diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 168c05f..57c57f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,27 +14,28 @@ jobs: steps: - uses: actions/checkout@v4 - uses: EmbarkStudios/cargo-deny-action@v2 + with: + rust-version: "1.85.0" - uses: actions-rust-lang/setup-rust-toolchain@v1 with: components: rustfmt, clippy - run: | - cargo clippy --all-targets -F affinity,batching,retry \ + cargo clippy --all-targets -F affinity,local-batch,retry \ -- -D warnings $(cat .lints | cut -f1 -d"#" | tr '\n' ' ') - run: cargo fmt -- --check - - run: cargo doc -F affinity,batching,retry - - run: cargo test -F affinity,batching,retry --doc + - run: RUSTDOCFLAGS="-D warnings" cargo doc -F affinity,local-batch,retry + - run: cargo test -F affinity,local-batch,retry --doc coverage: name: Code coverage runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: EmbarkStudios/cargo-deny-action@v2 - uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov - name: Generate code coverage - run: cargo llvm-cov --lcov --output-path lcov.info -F affinity,batching,retry + run: cargo llvm-cov --lcov --output-path lcov.info -F affinity,local-batch,retry - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 with: @@ -58,7 +59,6 @@ jobs: - loole steps: - uses: actions/checkout@v4 - - uses: EmbarkStudios/cargo-deny-action@v2 - uses: actions-rust-lang/setup-rust-toolchain@v1 - uses: actions-rs/cargo@v1 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index d1dd848..c5c30ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,36 @@ ## 0.3.0 +The general theme of this release is performance improvement by eliminating thread contention due to unnecessary locking of shared state. This required making some breaking changes to the API. + +* **Breaking** + * `beekeeper::hive::Hive` type signature has changed + * Removed the `W: Worker` parameter as it is redundant (can be obtained from `Q::Kind`) + * Added `T: TaskQueues`to specify the `TaskQueues` implementation + * The `Builder` interface has been re-written to enable maximum flexibility. + * `Builder` is now a trait that must be in scope. + * `ChannelBuilder` implements the previous builder functionality. + * `OpenBuilder` has no type parameters and can be specialized to create a `Hive` with any combination of `Queen` and `TaskQueues`. + * `BeeBuilder` and `FullBuilder` are intermediate types that generally should not be instantiated directly. + * `beekeeper::bee::Queen::create` now takes `&self` rather than `&mut self`. There is a new type, `beekeeper::bee::QueenMut`, with a `create(&mut self)` method, and needs to be wrapped in a `beekeeper::bee::QueenCell` to implement the `Queen` trait. This enables the `Hive` to create new workers without locking in the case of a `Queen` that does not need mutable state. + * `beekeeper::bee::Context` now takes a generic parameter that must be input type of the `Worker`. + * `beekeeper::hive::Hive::try_into_husk` now has an `urgent` parameter to indicate whether queued tasks should be abandoned when shutting down the hive (`true`) or if they should be allowed to finish processing (`false`). + * The type of `attempt` and `max_retries` has been changed to `u8`. This reduces memory usage and should still allow for the majority of use cases. + * The `::of` methods have been removed from stock `Worker`s in favor of implementing `From`. * Features - * Added the `batching` feature, which enables worker threads to queue up batches of tasks locally, which can alleviate contention between threads in the pool, especially when there are many short-lived tasks. + * Added the `TaskQueues` trait, which enables `Hive` to be specialized for different implementations of global (i.e., sending tasks from the `Hive` to worker threads) and local (i.e., worker thread-specific) queues. + * `ChannelTaskQueues` implements the existing behavior, using a channel for sending tasks. + * `WorkstealingTaskQueues` has been added to implement the workstealing pattern, based on `crossbeam::dequeue`. + * Added the `local-batch` feature, which enables worker threads to queue up batches of tasks locally, which can alleviate contention between threads in the pool, especially when there are many short-lived tasks. + * When this feature is enabled, tasks can be optionally weighted (by wrapping each input in `crate::hive::Weighted`) to help evenly distribute tasks with variable processing times. + * Enabling this feature should be transparent (i.e., not break existing code), and the `Hive`'s task submission methods support both weighted and unweighted inputs (due to the blanket implementation of `From for Weighted`); however, there are some cases where it is now necessary to specify the input type where before it could be elided. + * Added the `Context::submit` method, which enables tasks to submit new tasks to the `Hive`. +* Other + * Switched to using thread-local retry queues for the implementation of the `retry` feature, to reduce thread-contention. + * Switched to storing `Outcome`s in the hive using a data structure that does not require locking when inserting, which should reduce thread contention when using `*_store` operations. + * Switched to using `crossbeam_channel` for the task input channel in `ChannelTaskQueues`. These are multi-produer, multi-consumer channels (mpmc; as opposed to `std::mpsc`, which is single-consumer), which means it is no longer necessary for worker threads to aquire a Mutex lock on the channel receiver when getting tasks. + * Added the `beekeeper::hive::mock` module, which has a `MockTaskRunner` for `apply`ing a worker in a mock context. This is useful for testing your `Worker`. + * Updated to `2024` edition and Rust version `1.85` ## 0.2.1 diff --git a/Cargo.toml b/Cargo.toml index 18b49ee..56119a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,26 +2,33 @@ name = "beekeeper" description = "A full-featured worker pool library for parallelizing tasks" version = "0.3.0" -edition = "2021" -rust-version = "1.80" +edition = "2024" +rust-version = "1.85" authors = ["John Didion "] repository = "https://github.com/jdidion/beekeeper" license = "MIT OR Apache-2.0" [dependencies] +crossbeam-channel = "0.5.13" crossbeam-deque = "0.8.6" +crossbeam-queue = "0.3.12" crossbeam-utils = "0.8.20" +derive_more = { version = "2.0.1", features = ["debug"] } +nanorand = { version = "0.7.0", default-features = false, features = [ + "std", + "tls", +] } num = "0.4.3" num_cpus = "1.16.0" parking_lot = "0.12.3" paste = "1.0.15" +simple-mermaid = "0.2.0" thiserror = "1.0.63" # required with the `affinity` feature core_affinity = { version = "0.8.1", optional = true } -# required with the `batching` feature -crossbeam-queue = { version = "0.3.12", optional = true } -# alternate channel implementations that can be enabled with features -crossbeam-channel = { version = "0.5.13", optional = true } +# required with alternate outcome channel implementations that can be enabled with features +# NOTE: these version requirements could be relaxed as we don't actually depend on the +# functionality of these crates internally (other than in tests) flume = { version = "0.11.1", optional = true } loole = { version = "0.4.0", optional = true } @@ -29,24 +36,31 @@ loole = { version = "0.4.0", optional = true } divan = "0.1.17" itertools = "0.14.0" serial_test = "3.2.0" -#rstest = "0.22.0" +rstest = "0.22.0" stacker = "0.1.17" +aquamarine = "0.6.0" +simple-mermaid = "0.2.0" [[bench]] name = "perf" harness = false [features] -default = [] +default = ["local-batch"] affinity = ["dep:core_affinity"] -batching = ["dep:crossbeam-queue"] +local-batch = [] retry = [] -crossbeam = ["dep:crossbeam-channel"] +crossbeam = [] flume = ["dep:flume"] loole = ["dep:loole"] +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = [ + 'cfg(coverage,coverage_nightly)', +] } + [package.metadata.cargo-all-features] -allowlist = ["affinity", "batching", "retry"] +allowlist = ["affinity", "local-batch", "retry"] [profile.release] lto = true diff --git a/README.md b/README.md index cc4e309..8117cb3 100644 --- a/README.md +++ b/README.md @@ -16,39 +16,37 @@ is sometimes called a "worker pool"). ### Overview * Operations are defined by implementing the [`Worker`](https://docs.rs/beekeeper/latest/beekeeper/bee/worker/trait.Worker.html) trait. -* A [`Builder`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html) is used to configure and create a worker pool - called a [`Hive`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.Hive.html). +* A [`Builder`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/trait.Builder.html) is used to configure and create a worker pool called a [`Hive`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.Hive.html). +* `Hive` is generic over + * The type of [`Queen`](https://docs.rs/beekeeper/latest/beekeeper/bee/queen/trait.Queen.html) which creates `Worker` instances + * The type of [`TaskQueues`](https://docs.rs/beekeeper/latest/beekeeper/hive/trait.TaskQueues.html), which provides the global and worker thread-local queues for managing tasks +* Currently, two `TaskQueues` implementations are available: + * Channel: uses a [`crossbeam`](https://github.com/crossbeam-rs/crossbeam) channel to send tasks from the `Hive` to worker threads + * When the `local-batch` feature is enabled, local batch queues are implemented using [`crossbeam_queue::ArrayQueue`](https://docs.rs/crossbeam/latest/crossbeam/queue/struct.ArrayQueue.html) + * Workstealing: * The `Hive` creates a `Worker` instance for each thread in the pool. * Each thread in the pool continually: - * Recieves a task from an input [`channel`](https://doc.rust-lang.org/stable/std/sync/mpsc/fn.channel.html), + * Receives a task from an input queue, * Calls its `Worker`'s [`apply`](https://docs.rs/beekeeper/latest/beekeeper/bee/worker/trait.Worker.html#method.apply) method on the input, and * Produces an [`Outcome`](https://docs.rs/beekeeper/latest/beekeeper/hive/outcome/outcome/enum.Outcome.html). * Depending on which of `Hive`'s methods are called to submit a task (or batch of tasks), the `Outcome`(s) may be returned as an `Iterator`, sent to an output `channel`, or stored in the `Hive` for later retrieval. * A `Hive` may create `Worker`s may in one of three ways: - * Call the `default()` function on a `Worker` type that implements - [`Default`](std::default::Default) - * Clone an instance of a `Worker` that implements - [`Clone`](std::clone::Clone) - * Call the [`create()`](https://docs.rs/beekeeper/latest/beekeeper/bee/queen/trait.Queen.html#method.create) method on a worker factory that - implements the [`Queen`](https://docs.rs/beekeeper/latest/beekeeper/bee/queen/trait.Queen.html) trait. -* Both `Worker`s and `Queen`s may be stateful, i.e., `Worker::apply()` and `Queen::create()` - both take `&mut self`. + * Call the `default()` function on a `Worker` type that implements [`Default`](std::default::Default) + * Clone an instance of a `Worker` that implements [`Clone`](std::clone::Clone) + * Call the [`create()`](https://docs.rs/beekeeper/latest/beekeeper/bee/queen/trait.Queen.html#method.create) method on a worker factory that implements the [`Queen`](https://docs.rs/beekeeper/latest/beekeeper/bee/queen/trait.Queen.html) trait. +* A `Worker`s may be stateful, i.e., `Worker::apply()` takes a `&mut self` +* While `Queen` is not stateful, [`QueenMut`](https://docs.rs/beekeeper/latest/beekeeper/bee/queen/trait.QueenMut.html) may be (i.e., it's `create()` method takes a `&mut self`) * Although it is strongly recommended to avoid `panic`s in worker threads (and thus, within `Worker` implementations), the `Hive` does automatically restart any threads that panic. -* A `Hive` may be [`suspend`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.Hive.html#method.suspend)ed and - [`resume`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.Hive.html#method.resume)d at any time. When a `Hive` is suspended, worker threads - do no work and tasks accumulate in the input `channel`. -* Several utility functions are provided in the [util](https://docs.rs/beekeeper/latest/beekeeper/util/) module. Notably, the `map` - and `try_map` functions enable simple parallel processing of a single batch of tasks. -* Several useful `Worker` implementations are provided in the [stock](https://docs.rs/beekeeper/latest/beekeeper/bee/stock/) module. - Most notable are those in the [`call`](https://docs.rs/beekeeper/latest/beekeeper/bee/stock/call/) submodule, which provide - different ways of wrapping `callable`s, i.e., closures and function pointers. +* A `Hive` may be [`suspend`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.Hive.html#method.suspend)ed and [`resume`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.Hive.html#method.resume)d at any time. When a `Hive` is suspended, worker threads do no work and tasks accumulate in the input queue. +* Several utility functions are provided in the [util](https://docs.rs/beekeeper/latest/beekeeper/util/) module. Notably, the `map` and `try_map` functions enable simple parallel processing of a single batch of tasks. +* Several useful `Worker` implementations are provided in the [stock](https://docs.rs/beekeeper/latest/beekeeper/bee/stock/) module. Most notable are those in the [`call`](https://docs.rs/beekeeper/latest/beekeeper/bee/stock/call/) submodule, which provide different ways of wrapping `callable`s, i.e., closures and function pointers. * The following optional features are provided via feature flags: - * `affinity`: worker threads may be pinned to CPU cores to minimize the overhead of - context-switching. - * `batching` (>=0.3.0): worker threads take batches of tasks from the input channel and queue them locally, which may alleviate thread contention, especially when there are many short-lived tasks. + * `affinity`: worker threads may be pinned to CPU cores to minimize the overhead of context-switching. + * `local-batch` (>=0.3.0): worker threads take batches of tasks from the global input queue and add them to a local queue, which may alleviate thread contention, especially when there are many short-lived tasks. + * Tasks may be [`Weighted`](https://docs.rs/beekeeper/latest/beekeeper/hive/weighted/struct.Weighted.html) to enable balancing unevenly sized tasks between worker threads. * `retry`: Tasks that fail due to transient errors (e.g., temporarily unavailable resources) may be retried a set number of times, with an optional, exponentially increasing delay between retries. @@ -65,31 +63,27 @@ To parallelize a task, you'll need two things: * Implement your own (See Example 3 below) * `use` the necessary traits (e.g., `use beekeeper::bee::prelude::*`) * Define a `struct` for your worker - * Implement the `Worker` trait on your struct and define the `apply` method with the - logic of your task + * Implement the `Worker` trait on your struct and define the `apply` method with the logic of your task * Do at least one of the following: * Implement `Default` for your worker * Implement `Clone` for your worker - * Create a custom worker fatory that implements the `Queen` trait + * Create a custom worker fatory that implements the `Queen` or `QueenMut` trait 2. A `Hive` to execute your tasks. Your options are: * Use one of the convenience methods in the [util](https://docs.rs/beekeeper/latest/beekeeper/util/) module (see Example 1 below) - * Create a `Hive` manually using [`Builder`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html) (see Examples 2 - and 3 below) - * [`Builder::new()`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html#method.new) creates an empty `Builder` - * [`Builder::default()`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html#method.default) creates a `Builder` + * Create a `Hive` manually using a [`Builder`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/trait.Builder.html) (see Examples 2 and 3 below) + * [`OpenBuilder`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html) is the most general builder + * [`OpenBuilder::new()`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.new) creates an empty `OpenBuilder` + * [`Builder::default()`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.default) creates a `OpenBuilder` with the global default settings (which may be changed using the functions in the [`hive`](https://docs.rs/beekeeper/latest/beekeeper/hive/) module, e.g., `beekeeper::hive::set_num_threads_default(4)`). - * Use one of the `build_*` methods to build the `Hive`: - * If you have a `Worker` that implements `Default`, use - [`build_with_default::()`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html#method.build_with_default) - * If you have a `Worker` that implements `Clone`, use - [`build_with(MyWorker::new())`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html#method.build_with) - * If you have a custom `Queen`, use - [`build_default::()`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html#method.build_default) if it implements - `Default`, otherwise use [`build(MyQueen::new())`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html#method.build) - * Note that [`Builder::num_threads()`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html#method.num_threads) must be set - to a non-zero value, otherwise the built `Hive` will not start any worker threads - until you call the [`Hive::grow()`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.Hive.html#method.grow) method. + * The builder must be specialized for the `Queen` and `TaskQueues` types: + * If you have a `Worker` that implements `Default`, use [`with_worker_default::()`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.with_worker_default) + * If you have a `Worker` that implements `Clone`, use [`with_worker(MyWorker::new())`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.with_worker) + * If you have a custom `Queen`, use [`with_queen_default::()`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.with_queen_default) if it implements `Default`, otherwise use [`with_queen(MyQueen::new())`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.with_queen) + * If you have a custom `QueenMut`, use [`with_queen_mut_default::()`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.with_queen_mut_default) if it implements `Default`, otherwise use [`with_queen_mut(MyQueenMut::new())`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.with_queen_mut) + * Use the [`with_channel_queues`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.with_channel_queues.html) or [`with_workstealing_queues`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.OpenBuilder.html#method.with_workstealing_queues.html) to configure the `TaskQueues` implementation + * Use the `build()` methods to build the `Hive` + * Note that [`Builder::num_threads()`](https://docs.rs/beekeeper/latest/beekeeper/hive/builder/struct.Builder.html#method.num_threads) must be set to a non-zero value, otherwise the built `Hive` will not start any worker threads until you call the [`Hive::grow()`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.Hive.html#method.grow) method. Once you've created a `Hive`, use its methods to submit tasks for processing. There are four groups of methods available: @@ -98,7 +92,7 @@ four groups of methods available: that implements `IntoIterator`) * `map`: submits an arbitrary batch of tasks (i.e., anything that implements `IntoIterator`) * `scan`: Similar to `map`, but you also provide 1) an initial value for a state variable, and - 2) a function that transforms each item in the input iterator into the input type required by + 1) a function that transforms each item in the input iterator into the input type required by the `Worker`, and also has access to (and may modify) the state variable. There are multiple methods in each group that differ by how the task results (called @@ -108,7 +102,7 @@ There are multiple methods in each group that differ by how the task results (ca * The methods with the `_unordered` suffix instead return an unordered iterator, which may be more performant than the ordered iterator * The methods with the `_send` suffix accept a channel `Sender` and send the `Outcome`s to that - channel as they are completed + channel as they are completed (see this [note](https://docs.rs/beekeeper/latest/beekeeper/hive/index.html#outcome-channels)). * The methods with the `_store` suffix store the `Outcome`s in the `Hive`; these may be retrieved later using the [`Hive::take_stored()`](https://docs.rs/beekeeper/latest/beekeeper/hive/struct.Hive.html#method.take_stored) method, using one of the `remove*` methods (which requires @@ -170,14 +164,14 @@ let hive = Builder::new() // return results to your own channel... let (tx, rx) = outcome_channel(); let _ = hive.swarm_send( - (0..10).map(|i: i32| Thunk::of(move || i * i)), + (0..10).map(|i: i32| Thunk::from(move || i * i)), tx ); assert_eq!(285, rx.into_outputs().take(10).sum()); // return results as an iterator... let total = hive - .swarm_unordered((0..10).map(|i: i32| Thunk::of(move || i * -i))) + .swarm_unordered((0..10).map(|i: i32| Thunk::from(move || i * -i))) .into_outputs() .sum(); assert_eq!(-285, total); @@ -230,7 +224,7 @@ impl Worker for CatWorker { fn apply( &mut self, input: Self::Input, - _: &Context + _: &Context ) -> WorkerResult { self.write_char(input).map_err(|error| { ApplyError::Fatal { input: Some(input), error } @@ -252,7 +246,7 @@ impl CatQueen { } } -impl Queen for CatQueen { +impl QueenMut for CatQueen { type Kind = CatWorker; fn create(&mut self) -> Self::Kind { @@ -288,7 +282,7 @@ impl Drop for CatQueen { // build the Hive let hive = Builder::new() .num_threads(4) - .build_default::() + .build_default_mut::() .unwrap(); // prepare inputs @@ -311,8 +305,11 @@ assert_eq!(output, b"abcdefgh"); // shutdown the hive, use the Queen to wait on child processes, and // report errors let (mut queen, _outcomes) = hive.try_into_husk().unwrap().into_parts(); -let (wait_ok, wait_err): (Vec<_>, Vec<_>) = - queen.wait_for_all().into_iter().partition(Result::is_ok); +let (wait_ok, wait_err): (Vec<_>, Vec<_>) = queen + .into_inner() + .wait_for_all() + .into_iter() + .partition(Result::is_ok); if !wait_err.is_empty() { panic!( "Error(s) occurred while waiting for child processes: {:?}", @@ -337,9 +334,9 @@ if !exec_err_codes.is_empty() { ## Status -The `beekeeper` API is generally considered to be stable, but additional real-world battle-testing -is desired before promoting the version to `1.0.0`. If you identify bugs or have suggestions for -improvement, please [open an issue](https://github.com/jdidion/beekeeper/issues). +Early versions of this crate (< 0.3) had some fatal design flaws that needed to be corrected with breaking changes (see the [changelog](CHANGELOG.md)). + +As of version 0.3, the `beekeeper` API is generally considered to be stable, but additional real-world battle-testing is desired before promoting the version to `1.0.0`. If you identify bugs or have suggestions for improvement, please [open an issue](https://github.com/jdidion/beekeeper/issues). ## Similar libraries diff --git a/benches/perf.rs b/benches/perf.rs index 53175fb..2088bc8 100644 --- a/benches/perf.rs +++ b/benches/perf.rs @@ -1,6 +1,6 @@ use beekeeper::bee::stock::EchoWorker; -use beekeeper::hive::{outcome_channel, Builder}; -use divan::{bench, black_box_drop, AllocProfiler, Bencher}; +use beekeeper::hive::{Builder, ChannelBuilder, TaskQueuesBuilder, outcome_channel}; +use divan::{AllocProfiler, Bencher, bench, black_box_drop}; use itertools::iproduct; #[global_allocator] @@ -11,13 +11,14 @@ const TASKS: &[usize] = &[1, 100, 10_000, 1_000_000]; #[bench(args = iproduct!(THREADS, TASKS))] fn bench_apply_short_task(bencher: Bencher, (num_threads, num_tasks): (&usize, &usize)) { - let hive = Builder::new() + let hive = ChannelBuilder::empty() .num_threads(*num_threads) - .build_with_default::>(); + .with_worker_default::>() + .build(); bencher.bench_local(|| { let (tx, rx) = outcome_channel(); for i in 0..*num_tasks { - hive.apply_send(i, tx.clone()); + hive.apply_send(i, &tx); } hive.join(); rx.into_iter().take(*num_tasks).for_each(black_box_drop); diff --git a/deny.toml b/deny.toml index 2de519b..a03f0c0 100644 --- a/deny.toml +++ b/deny.toml @@ -70,10 +70,7 @@ feature-depth = 1 # A list of advisory IDs to ignore. Note that ignored advisories will still # output a note when they are encountered. ignore = [ - #"RUSTSEC-0000-0000", - #{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" }, - #"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish - #{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" }, + { id = "RUSTSEC-2024-0436", reason = "paste is considered 'finished'" }, ] # If this is true, then cargo deny will use the git executable to fetch advisory database. # If this is false, then it uses a built-in git library. @@ -88,7 +85,7 @@ ignore = [ # List of explicitly allowed licenses # See https://spdx.org/licenses/ for list of possible licenses # [possible values: any SPDX 3.11 short identifier (+ optional exception)]. -allow = ["MIT", "Apache-2.0", "Unicode-DFS-2016", "Unicode-3.0"] +allow = ["MIT", "Apache-2.0", "Unicode-3.0"] # The confidence threshold for detecting a license from license text. # The higher the value, the more closely the license text must be to the # canonical license text of a valid SPDX license file. diff --git a/src/atomic.rs b/src/atomic.rs index ec3d74c..3b58e3e 100644 --- a/src/atomic.rs +++ b/src/atomic.rs @@ -4,7 +4,6 @@ //! TODO: The `Atomic` and `AtomicNumeric` traits and implementations could be replaced with the //! equivalents from the `atomic`, `atomig`, or `radium` crates, but none of those seem to be //! well-maintained at this point. - pub use num::PrimInt; use paste::paste; use std::fmt::Debug; @@ -28,8 +27,6 @@ pub trait Atomic: Clone + Debug + Default + From pub struct Orderings { pub load: Ordering, pub swap: Ordering, - pub fetch_update_set: Ordering, - pub fetch_update_fetch: Ordering, pub fetch_add: Ordering, pub fetch_sub: Ordering, } @@ -39,8 +36,6 @@ impl Default for Orderings { Orderings { load: Ordering::Acquire, swap: Ordering::Release, - fetch_update_set: Ordering::AcqRel, - fetch_update_fetch: Ordering::Acquire, fetch_add: Ordering::AcqRel, fetch_sub: Ordering::AcqRel, } @@ -146,6 +141,7 @@ macro_rules! atomic_int { } atomic!(bool); +atomic_int!(u8); atomic_int!(u32); atomic_int!(u64); atomic_int!(usize); @@ -365,6 +361,7 @@ mod affinity { } #[cfg(test)] + #[cfg_attr(coverage_nightly, coverage(off))] mod tests { use crate::atomic::{AtomicAny, AtomicOption, MutError}; @@ -392,8 +389,8 @@ mod affinity { } } -#[cfg(feature = "batching")] -mod batching { +#[cfg(any(feature = "local-batch", feature = "retry"))] +mod local_batch { use super::{Atomic, AtomicOption, MutError}; use std::fmt::Debug; @@ -415,6 +412,7 @@ mod batching { } #[cfg(test)] + #[cfg_attr(coverage_nightly, coverage(off))] mod tests { use crate::atomic::{AtomicOption, AtomicUsize, MutError}; @@ -438,6 +436,7 @@ mod batching { } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::*; use paste::paste; @@ -462,6 +461,7 @@ mod tests { }; } + test_numeric_type!(AtomicU8); test_numeric_type!(AtomicU32); test_numeric_type!(AtomicU64); test_numeric_type!(AtomicUsize); diff --git a/src/barrier.rs b/src/barrier.rs index 51c2206..9ee84ff 100644 --- a/src/barrier.rs +++ b/src/barrier.rs @@ -1,12 +1,12 @@ use parking_lot::RwLock; use std::collections::HashSet; -/// Enables multiple threads to synchronize the beginning of some computation. Unlike -/// [`std::sync::Barrier`], this one keeps track of which threads have reached it and only -/// recognizes the first wait from each thread. use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Barrier}; use std::thread::{self, ThreadId}; +/// Enables multiple threads to synchronize the beginning of some computation. Unlike +/// [`std::sync::Barrier`], this one keeps track of which threads have reached it and only +/// recognizes the first wait from each thread. #[derive(Clone)] pub struct IndexedBarrier(Arc); diff --git a/src/bee/context.rs b/src/bee/context.rs index e2b9315..ea7c8ba 100644 --- a/src/bee/context.rs +++ b/src/bee/context.rs @@ -1,57 +1,182 @@ //! The context for a task processed by a `Worker`. -use crate::atomic::{Atomic, AtomicBool}; +use std::cell::RefCell; use std::fmt::Debug; -use std::sync::Arc; +/// Type of unique ID for a task within the `Hive`. pub type TaskId = usize; -/// Context for a task. -#[derive(Debug, Default)] -pub struct Context { - task_id: TaskId, - cancelled: Arc, - #[cfg(feature = "retry")] - attempt: u32, +/// Trait that provides a `Context` with limited access to a worker thread's state during +/// task execution. +pub trait LocalContext: Debug { + /// Returns `true` if tasks in progress should be cancelled. + fn should_cancel_tasks(&self) -> bool; + + /// Submits a new task to the `Hive` that is executing the current task. + fn submit_task(&self, input: I) -> TaskId; + + #[cfg(test)] + fn thread_index(&self) -> usize; +} + +/// The context visible to a task when processing an input. +#[derive(Debug)] +pub struct Context<'a, I> { + meta: TaskMeta, + local: Option<&'a dyn LocalContext>, + subtask_ids: RefCell>>, } -impl Context { - /// Creates a new `Context` with the given task_id and shared cancellation status. - pub fn new(task_id: TaskId, cancelled: Arc) -> Self { +impl<'a, I> Context<'a, I> { + /// Returns a new empty context. This is primarily useful for testing. + pub fn empty() -> Self { Self { - task_id, - cancelled, - #[cfg(feature = "retry")] - attempt: 0, + meta: TaskMeta::default(), + local: None, + subtask_ids: RefCell::new(None), } } - /// Creates an empty `Context`. - pub fn empty() -> Self { - Self::new(0, Arc::new(AtomicBool::from(false))) + /// Creates a new `Context` with the given task metadata and shared state. + pub fn new(meta: TaskMeta, local: Option<&'a dyn LocalContext>) -> Self { + Self { + meta, + local, + subtask_ids: RefCell::new(None), + } } - /// The task_id of this task within the `Hive`. + /// The unique ID of this task within the `Hive`. pub fn task_id(&self) -> TaskId { - self.task_id + self.meta.id() + } + + /// Returns the number of previous failed attempts to execute the current task. + pub fn attempt(&self) -> u8 { + self.meta.attempt() } - /// Returns `true` if the task has been cancelled. A long-running `Worker` should check this - /// periodically and, if it returns `true`, exit early with an `ApplyError::Cancelled` result. + /// Returns `true` if the current task should be cancelled. + /// + /// A long-running `Worker` should check this periodically and, if it returns `true`, exit + /// early with an `ApplyError::Cancelled` result. pub fn is_cancelled(&self) -> bool { - self.cancelled.get() + self.local + .as_ref() + .map(|local| local.should_cancel_tasks()) + .unwrap_or(false) + } + + /// Submits a new task to the `Hive` that is executing the current task. + /// + /// If a thread-local queue is available and has capacity, the task will be added to it, + /// otherwise it is added to the global queue. The ID of the submitted task is stored in this + /// `Context` and ultimately returned in the `subtask_ids` of the `Outcome` of the submitting + /// task. + /// + /// The task will be submitted with the same outcome sender as the current task, or stored in + /// the `Hive` if there is no sender. + /// + /// Returns an `Err` containing `input` if the new task was not successfully submitted. + pub fn submit(&self, input: I) -> Result<(), I> { + if let Some(local) = self.local.as_ref() { + let task_id = local.submit_task(input); + self.subtask_ids + .borrow_mut() + .get_or_insert_default() + .push(task_id); + Ok(()) + } else { + Err(input) + } + } + + /// Returns the unique index of the worker thread executing this task. + #[cfg(test)] + pub fn thread_index(&self) -> Option { + self.local.map(|local| local.thread_index()) + } + + /// Consumes this `Context` and returns the IDs of the subtasks spawned during the execution + /// of the task, if any. + pub(crate) fn into_parts(self) -> (TaskMeta, Option>) { + (self.meta, self.subtask_ids.into_inner()) } } -#[cfg(feature = "retry")] -impl Context { - /// The current retry attempt. The value is `0` for the first attempt and increments by `1` for - /// each retry attempt (if any). - pub fn attempt(&self) -> u32 { - self.attempt +/// The metadata of a task. +#[derive(Clone, Debug, Default)] +pub struct TaskMeta { + id: TaskId, + #[cfg(feature = "local-batch")] + weight: u32, + #[cfg(feature = "retry")] + attempt: u8, +} + +impl TaskMeta { + /// Creates a new `TaskMeta` with the given task ID. + pub fn new(id: TaskId) -> Self { + TaskMeta { + id, + ..Default::default() + } + } + + /// Creates a new `TaskMeta` with the given task ID and weight. + #[cfg(feature = "local-batch")] + pub fn with_weight(task_id: TaskId, weight: u32) -> Self { + TaskMeta { + id: task_id, + weight, + ..Default::default() + } + } + + /// Returns the unique ID of this task within the `Hive`. + pub fn id(&self) -> TaskId { + self.id } - /// Increments the retry attempt. + /// Returns the number of previous failed attempts to execute the current task. + /// + /// Always returns `0` if the `retry` feature is not enabled. + pub fn attempt(&self) -> u8 { + #[cfg(feature = "retry")] + return self.attempt; + #[cfg(not(feature = "retry"))] + return 0; + } + + /// Increments the number of previous failed attempts to execute the current task. + #[cfg(feature = "retry")] pub(crate) fn inc_attempt(&mut self) { self.attempt += 1; } + + /// Returns the task weight. + /// + /// Always returns `0` if the `local-batch` feature is not enabled. + pub fn weight(&self) -> u32 { + #[cfg(feature = "local-batch")] + return self.weight; + #[cfg(not(feature = "local-batch"))] + return 0; + } +} + +impl From for TaskMeta { + fn from(value: TaskId) -> Self { + TaskMeta::new(value) + } +} + +#[cfg(all(test, feature = "retry"))] +impl TaskMeta { + pub fn with_attempt(task_id: TaskId, attempt: u8) -> Self { + Self { + id: task_id, + attempt, + ..Default::default() + } + } } diff --git a/src/bee/error.rs b/src/bee/error.rs index 5a0137e..0cc5138 100644 --- a/src/bee/error.rs +++ b/src/bee/error.rs @@ -91,6 +91,7 @@ impl From for ApplyRefError { } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::ApplyError; use crate::panic::Panic; diff --git a/src/bee/mod.rs b/src/bee/mod.rs index b72fdc9..244eb85 100644 --- a/src/bee/mod.rs +++ b/src/bee/mod.rs @@ -3,6 +3,14 @@ //! A [`Hive`](crate::hive::Hive) is populated by bees: //! * The [`Worker`]s process the tasks submitted to the `Hive`. //! * The [`Queen`] creates a new `Worker` for each thread in the `Hive`. +//! * [`QueenMut`] can be used to implement a stateful queen - it must be wrapped in a +//! [`QueenCell`] to make it thread-safe. +//! +//! It is easiest to use the [`prelude`] when implementing your bees: +//! +//! ``` +//! use beekeeper::bee::prelude::*; +//! ``` //! //! # Worker //! @@ -18,7 +26,15 @@ //! //! The `Worker` trait has a single method, [`apply`](crate::bee::Worker::apply), which //! takes an input of type `Input` and a [`Context`] and returns a `Result` containing an either an -//! `Output` or an [`ApplyError`]. +//! `Output` or an [`ApplyError`]. Note that `Worker::apply()` takes a `&mut self` parameter, +//! meaning that it can modify its own state. +//! +//! If a fatal error occurs during processing of the task, the worker should return +//! [`ApplyError::Fatal`]. +//! +//! If the task instead fails due to a transient error, the worker should return +//! [`ApplyError::Retryable`]. If the `retry` feature is enabled, then a task that fails with a +//! `ApplyError::Retryable` error will be retried, otherwise the error is converted to `Fatal`. //! //! The `Context` contains information about the task, including: //! * The task ID. Each task submitted to a `Hive` is assigned an ID that is unique within @@ -29,16 +45,14 @@ //! periodically check the cancellation flag by calling //! [`Context::is_cancelled()`](crate::bee::context::Context::is_cancelled). If the cancellation //! flag is set, the worker may terminate early by returning [`ApplyError::Cancelled`]. -//! * If the `retry` feature is enabled, the `Context` also contains the retry -//! [`attempt`](crate::bee::context::Context::attempt), which starts at `0` the first time the task -//! is attempted and increments by `1` for each subsequent retry attempt. -//! -//! If a fatal error occurs during processing of the task, the worker should return -//! [`ApplyError::Fatal`]. +//! * The retry [`attempt`](crate::bee::context::Context::attempt), which starts at `0` the first +//! time the task is attempted. If the `retry` feature is enabled and the task fails with +//! [`ApplyError::Retryable], this value increments by `1` for each subsequent retry attempt. //! -//! If the task instead fails due to a transient error, the worker should return -//! [`ApplyError::Retryable`]. If the `retry` feature is enabled, then a task that fails with a -//! `ApplyError::Retryable` error will be retried, otherwise the error is converted to `Fatal`. +//! The `Context` also provides the ability to submit new tasks to the `Hive` using the +//! [`submit`](crate::bee::Context::submit) method. The IDs of submitted subtasks are stored in the +//! `Context` and are returned in a field of the [`Outcome`](crate::hive::Outcome) that results +//! from the parent task. //! //! A `Worker` should not panic. However, if it must execute code that may panic, it can do so //! within a closure passed to [`Panic::try_call`](crate::panic::Panic::try_call) and convert an @@ -85,25 +99,18 @@ //! A queen is defined by implementing the [`Queen`] trait. A single `Queen` instance is used to //! create the `Worker` instances for each worker thread in a `Hive`. //! -//! It is often not necessary to manually implement the `Queen` trait. For exmaple, if your `Worker` +//! If you need for the queen to have mutable state, you can instead implement [`QueenMut`], whose +//! [`create`](crate::bee::QueenMut::create) method takes `&mut self` as a parameter. When +//! creating a `Hive`, the `QueenMut` must be wrapped in a [`QueenCell`] to make it thread-safe. +//! +//! It is often not necessary to manually implement the `Queen` trait. For example, if your `Worker` //! implements `Default`, then you can use [`DefaultQueen`] implicitly by calling -//! [`Builder::build_with_default`](crate::hive::Builder::build_with_default). Similarly, +//! [`OpenBuilder::with_worker_default`](crate::hive::OpenBuilder::with_worker_default). Similarly, //! if your `Worker` implements `Clone`, then you can use [`CloneQueen`] -//! implicitly by calling [`Builder::build_with`](crate::hive::Builder::build_with). +//! implicitly by calling [`OpenBuilder::with_worker`](crate::hive::OpenBuilder::with_worker). //! //! A `Queen` should never panic when creating `Worker`s. //! -//! # Implementation Notes -//! -//! It is easiest to use the [`prelude`] when implementing your bees: -//! -//! ``` -//! use beekeeper::bee::prelude::*; -//! ``` -//! -//! Note that both `Queen::create()` and `Worker::apply()` receive `&mut self`, meaning that they -//! can modify their own state. -//! //! The state of a `Hive`'s `Queen` may be interrogated either //! [during](crate::hive::Hive::queen) or [after](crate::hive::Hive::try_into_husk) the //! life of the `Hive`. However, `Worker`s may never be accessed directly. Thus, it is often @@ -115,14 +122,14 @@ mod queen; pub mod stock; mod worker; -pub use context::{Context, TaskId}; -pub use error::{ApplyError, ApplyRefError}; -pub use queen::{CloneQueen, DefaultQueen, Queen}; -pub use worker::{RefWorker, RefWorkerResult, Worker, WorkerError, WorkerResult}; +pub use self::context::{Context, LocalContext, TaskId, TaskMeta}; +pub use self::error::{ApplyError, ApplyRefError}; +pub use self::queen::{CloneQueen, DefaultQueen, Queen, QueenCell, QueenMut}; +pub use self::worker::{RefWorker, RefWorkerResult, Worker, WorkerError, WorkerResult}; pub mod prelude { pub use super::{ - ApplyError, ApplyRefError, Context, Queen, RefWorker, RefWorkerResult, Worker, WorkerError, - WorkerResult, + ApplyError, ApplyRefError, Context, Queen, QueenCell, QueenMut, RefWorker, RefWorkerResult, + Worker, WorkerError, WorkerResult, }; } diff --git a/src/bee/queen.rs b/src/bee/queen.rs index aa4ab97..c0c93c8 100644 --- a/src/bee/queen.rs +++ b/src/bee/queen.rs @@ -1,16 +1,86 @@ //! The Queen bee trait. use super::Worker; +use derive_more::Debug; +use parking_lot::RwLock; use std::marker::PhantomData; +use std::ops::Deref; +use std::{any, fmt}; -/// A trait for stateful factories that create `Worker`s. +/// A trait for factories that create `Worker`s. pub trait Queen: Send + Sync + 'static { /// The kind of `Worker` created by this factory. type Kind: Worker; - /// Returns a new instance of `Self::Kind`. + /// Creates and returns a new instance of `Self::Kind`, *immutably*. + fn create(&self) -> Self::Kind; +} + +/// A trait for mutable factories that create `Worker`s. +pub trait QueenMut: Send + Sync + 'static { + /// The kind of `Worker` created by this factory. + type Kind: Worker; + + /// Creates and returns a new instance of `Self::Kind`, *immutably*. fn create(&mut self) -> Self::Kind; } +/// A wrapper for a `MutQueen` that implements `Queen`. +/// +/// Interior mutability is enabled using an `RwLock`. +pub struct QueenCell(RwLock); + +impl QueenCell { + /// Creates a new `QueenCell` with the given `mut_queen`. + pub fn new(mut_queen: Q) -> Self { + Self(RwLock::new(mut_queen)) + } + + /// Returns a reference to the wrapped `Queen`. + pub fn get(&self) -> impl Deref { + self.0.read() + } + + /// Consumes this `QueenCell` and returns the inner `Queen`. + pub fn into_inner(self) -> Q { + self.0.into_inner() + } +} + +impl Queen for QueenCell { + type Kind = Q::Kind; + + /// Calls the wrapped `QueenMut::create` method using interior mutability. + fn create(&self) -> Self::Kind { + self.0.write().create() + } +} + +impl fmt::Debug for QueenCell { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("QueenCell") + .field("queen", &*self.0.read()) + .finish() + } +} + +impl Clone for QueenCell { + fn clone(&self) -> Self { + Self(RwLock::new(self.0.read().clone())) + } +} + +impl Default for QueenCell { + fn default() -> Self { + Self::new(Q::default()) + } +} + +impl From for QueenCell { + fn from(queen: Q) -> Self { + Self::new(queen) + } +} + /// A `Queen` that can create a `Worker` type that implements `Default`. /// /// Note that, for the implementation to be generic, `W` also needs to be `Send` and `Sync`. If you @@ -28,7 +98,7 @@ pub trait Queen: Send + Sync + 'static { /// type Output = u8; /// type Error = (); /// -/// fn apply(&mut self, input: u8, _: &Context) -> WorkerResult { +/// fn apply(&mut self, input: u8, _: &Context) -> WorkerResult { /// Ok(self.0.saturating_add(input)) /// } /// } @@ -38,18 +108,25 @@ pub trait Queen: Send + Sync + 'static { /// impl Queen for MyQueen { /// type Kind = MyWorker; /// -/// fn create(&mut self) -> Self::Kind { +/// fn create(&self) -> Self::Kind { /// MyWorker::default() /// } /// } /// ``` #[derive(Default, Debug)] +#[debug("DefaultQueen<{}>", any::type_name::())] pub struct DefaultQueen(PhantomData); +impl Clone for DefaultQueen { + fn clone(&self) -> Self { + Self::default() + } +} + impl Queen for DefaultQueen { type Kind = W; - fn create(&mut self) -> Self::Kind { + fn create(&self) -> Self::Kind { Self::Kind::default() } } @@ -57,6 +134,7 @@ impl Queen for DefaultQueen { /// A `Queen` that can create a `Worker` type that implements `Clone`, by making copies of /// an existing instance of that `Worker` type. #[derive(Debug)] +#[debug("CloneQueen<{}>", any::type_name::())] pub struct CloneQueen(W); impl CloneQueen { @@ -65,10 +143,106 @@ impl CloneQueen { } } +impl Clone for CloneQueen { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl Default for CloneQueen { + fn default() -> Self { + Self(W::default()) + } +} + impl Queen for CloneQueen { type Kind = W; - fn create(&mut self) -> Self::Kind { + fn create(&self) -> Self::Kind { self.0.clone() } } + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use super::{CloneQueen, DefaultQueen, Queen, QueenCell, QueenMut}; + use crate::bee::stock::EchoWorker; + + #[derive(Default, Debug, Clone)] + struct TestQueen(usize); + + impl QueenMut for TestQueen { + type Kind = EchoWorker; + + fn create(&mut self) -> Self::Kind { + self.0 += 1; + EchoWorker::default() + } + } + + #[test] + fn test_queen_cell() { + let queen = QueenCell::new(TestQueen(0)); + for _ in 0..10 { + let _worker = queen.create(); + } + assert_eq!(queen.get().0, 10); + assert_eq!(queen.into_inner().0, 10); + } + + #[test] + fn test_queen_cell_default() { + let queen = QueenCell::::default(); + for _ in 0..10 { + let _worker = queen.create(); + } + assert_eq!(queen.get().0, 10); + } + + #[test] + fn test_queen_cell_clone() { + let queen = QueenCell::::default(); + for _ in 0..10 { + let _worker = queen.create(); + } + assert_eq!(queen.clone().get().0, 10); + } + + #[test] + fn test_queen_cell_debug() { + let queen = QueenCell::::default(); + for _ in 0..10 { + let _worker = queen.create(); + } + assert_eq!(format!("{:?}", queen), "QueenCell { queen: TestQueen(10) }"); + } + + #[test] + fn test_queen_cell_from() { + let queen = QueenCell::from(TestQueen::default()); + for _ in 0..10 { + let _worker = queen.create(); + } + assert_eq!(queen.get().0, 10); + } + + #[test] + fn test_default_queen() { + let queen1 = DefaultQueen::>::default(); + let worker1 = queen1.create(); + let queen2 = queen1.clone(); + let worker2 = queen2.create(); + assert_eq!(worker1, worker2); + } + + #[test] + fn test_clone_queen() { + let worker = EchoWorker::::default(); + let queen = CloneQueen::new(worker); + let worker1 = queen.create(); + let queen2 = queen.clone(); + let worker2 = queen2.create(); + assert_eq!(worker1, worker2); + } +} diff --git a/src/bee/stock/call.rs b/src/bee/stock/call.rs index 93bf057..54e9c00 100644 --- a/src/bee/stock/call.rs +++ b/src/bee/stock/call.rs @@ -2,15 +2,27 @@ use crate::bee::{ ApplyError, ApplyRefError, Context, RefWorker, RefWorkerResult, Worker, WorkerResult, }; -use std::fmt::Debug; +use derive_more::Debug; use std::marker::PhantomData; +use std::ops::{Deref, DerefMut}; +use std::{any, fmt}; /// Wraps a closure or function pointer and calls it when applied. For this `Callable` to be /// useable by a `Worker`, the function must be `FnMut` *and* `Clone`able. +/// +/// TODO: we could provide a better `Debug` implementation by providing a macro that can wrap a +/// closure and store the text of the function, and then change all the Workers to take a +/// `F: Deref`. +/// See https://users.rust-lang.org/t/is-it-possible-to-implement-debug-for-fn-type/14824/3 +#[derive(Debug)] struct Callable { + #[debug(skip)] f: F, + #[debug("{}", any::type_name::())] i: PhantomData, + #[debug("{}", any::type_name::())] o: PhantomData, + #[debug("{}", any::type_name::())] e: PhantomData, } @@ -23,6 +35,10 @@ impl Callable { e: PhantomData, } } + + fn into_inner(self) -> F { + self.f + } } impl Clone for Callable { @@ -31,18 +47,52 @@ impl Clone for Callable { } } +impl Deref for Callable { + type Target = F; + + fn deref(&self) -> &Self::Target { + &self.f + } +} + +impl DerefMut for Callable { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.f + } +} + /// A `Caller` that executes its function once on the input and returns the output. The function /// should not panic. -pub struct Caller(Callable); +#[derive(Debug)] +pub struct Caller { + callable: Callable, +} impl Caller { - pub fn of(f: F) -> Self - where - I: Send + Sync + 'static, - O: Send + Sync + 'static, - F: FnMut(I) -> O + Clone + 'static, - { - Caller(Callable::of(f)) + /// Returns the wrapped callable. + pub fn into_inner(self) -> F { + self.callable.into_inner() + } +} + +impl From for Caller +where + I: Send + Sync + 'static, + O: Send + Sync + 'static, + F: FnMut(I) -> O + Clone + 'static, +{ + fn from(f: F) -> Self { + Caller { + callable: Callable::of(f), + } + } +} + +impl Clone for Caller { + fn clone(&self) -> Self { + Self { + callable: self.callable.clone(), + } } } @@ -57,44 +107,46 @@ where type Error = (); #[inline] - fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { - Ok((self.0.f)(input)) + fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { + Ok((self.callable)(input)) } } -impl O> Debug for Caller { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("Caller") - } +/// A `Caller` that executes its function once on each input. The input value is consumed by the +/// function. If the function returns an error, it is wrapped in `ApplyError::Fatal`. +/// +/// If ownership of the input value is not required, consider using `RefCaller` instead. +#[derive(Debug)] +pub struct OnceCaller { + callable: Callable, } -impl Clone for Caller { - fn clone(&self) -> Self { - Self(self.0.clone()) +impl OnceCaller { + /// Returns the wrapped callable. + pub fn into_inner(self) -> F { + self.callable.into_inner() } } -impl O + Clone + 'static> From for Caller { +impl From for OnceCaller +where + I: Send + Sync + 'static, + O: Send + Sync + 'static, + E: Send + Sync + fmt::Debug + 'static, + F: FnMut(I) -> Result + Clone + 'static, +{ fn from(f: F) -> Self { - Caller(Callable::of(f)) + OnceCaller { + callable: Callable::of(f), + } } } -/// A `Caller` that executes its function once on each input. The input value is consumed by the -/// function. If the function returns an error, it is wrapped in `ApplyError::Fatal`. -/// -/// If ownership of the input value is not required, consider using `RefCaller` instead. -pub struct OnceCaller(Callable); - -impl OnceCaller { - pub fn of(f: F) -> Self - where - I: Send + Sync + 'static, - O: Send + Sync + 'static, - E: Send + Sync + Debug + 'static, - F: FnMut(I) -> Result + Clone + 'static, - { - OnceCaller(Callable::of(f)) +impl Clone for OnceCaller { + fn clone(&self) -> Self { + Self { + callable: self.callable.clone(), + } } } @@ -102,7 +154,7 @@ impl Worker for OnceCaller where I: Send + 'static, O: Send + 'static, - E: Send + Debug + 'static, + E: Send + fmt::Debug + 'static, F: FnMut(I) -> Result + Clone + 'static, { type Input = I; @@ -110,48 +162,47 @@ where type Error = E; #[inline] - fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { - (self.0.f)(input).map_err(|error| ApplyError::Fatal { error, input: None }) + fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { + (self.callable)(input).map_err(|error| ApplyError::Fatal { error, input: None }) } } -impl Result> Debug for OnceCaller { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("OnceCaller") - } +/// A `Caller` that executes its function once on a reference to the input. If the function +/// returns an error, it is wrapped in `ApplyError::Fatal`. +/// +/// The benefit of using `RefCaller` over `OnceCaller` is that the `Fatal` error +/// contains the input value for later recovery. +#[derive(Debug)] +pub struct RefCaller { + callable: Callable, } -impl Clone for OnceCaller { - fn clone(&self) -> Self { - Self(self.0.clone()) +impl RefCaller { + /// Returns the wrapped callable. + pub fn into_inner(self) -> F { + self.callable.into_inner() } } -impl From for OnceCaller +impl From for RefCaller where - F: FnMut(I) -> Result + Clone + 'static, + I: Send + Sync + 'static, + O: Send + Sync + 'static, + E: Send + Sync + fmt::Debug + 'static, + F: FnMut(&I) -> Result + Clone + 'static, { fn from(f: F) -> Self { - OnceCaller(Callable::of(f)) + RefCaller { + callable: Callable::of(f), + } } } -/// A `Caller` that executes its function once on a reference to the input. If the function -/// returns an error, it is wrapped in `ApplyError::Fatal`. -/// -/// The benefit of using `RefCaller` over `OnceCaller` is that the `Fatal` error -/// contains the input value for later recovery. -pub struct RefCaller(Callable); - -impl RefCaller { - pub fn of(f: F) -> Self - where - I: Send + Sync + 'static, - O: Send + Sync + 'static, - E: Send + Sync + Debug + 'static, - F: FnMut(&I) -> Result + Clone + 'static, - { - RefCaller(Callable::of(f)) +impl Clone for RefCaller { + fn clone(&self) -> Self { + Self { + callable: self.callable.clone(), + } } } @@ -159,7 +210,7 @@ impl RefWorker for RefCaller where I: Send + 'static, O: Send + 'static, - E: Send + Debug + 'static, + E: Send + fmt::Debug + 'static, F: FnMut(&I) -> Result + Clone + 'static, { type Input = I; @@ -167,45 +218,48 @@ where type Error = E; #[inline] - fn apply_ref(&mut self, input: &Self::Input, _: &Context) -> RefWorkerResult { - (self.0.f)(input).map_err(|error| ApplyRefError::Fatal(error)) + fn apply_ref( + &mut self, + input: &Self::Input, + _: &Context, + ) -> RefWorkerResult { + (self.callable)(input).map_err(|error| ApplyRefError::Fatal(error)) } } -impl Result> Debug for RefCaller { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("RefCaller") - } +/// A `Caller` that returns a `Result`. A result of `Err(ApplyError::Retryable)` +/// can be returned to indicate the task should be retried. +#[derive(Debug)] +pub struct RetryCaller { + callable: Callable, } -impl Clone for RefCaller { - fn clone(&self) -> Self { - Self(self.0.clone()) +impl RetryCaller { + /// Returns the wrapped callable. + pub fn into_inner(self) -> F { + self.callable.into_inner() } } -impl From for RefCaller +impl From for RetryCaller where - F: FnMut(&I) -> Result + Clone + 'static, + I: Send + Sync + 'static, + O: Send + Sync + 'static, + E: Send + Sync + fmt::Debug + 'static, + F: FnMut(I, &Context) -> Result> + Clone + 'static, { fn from(f: F) -> Self { - RefCaller(Callable::of(f)) + RetryCaller { + callable: Callable::of(f), + } } } -/// A `Caller` that returns a `Result`. A result of `Err(ApplyError::Retryable)` -/// can be returned to indicate the task should be retried. -pub struct RetryCaller(Callable); - -impl RetryCaller { - pub fn of(f: F) -> Self - where - I: Send + Sync + 'static, - O: Send + Sync + 'static, - E: Send + Sync + Debug + 'static, - F: FnMut(I, &Context) -> Result> + Clone + 'static, - { - RetryCaller(Callable::of(f)) +impl Clone for RetryCaller { + fn clone(&self) -> Self { + Self { + callable: self.callable.clone(), + } } } @@ -213,61 +267,49 @@ impl Worker for RetryCaller where I: Send + 'static, O: Send + 'static, - E: Send + Debug + 'static, - F: FnMut(I, &Context) -> Result> + Clone + 'static, + E: Send + fmt::Debug + 'static, + F: FnMut(I, &Context) -> Result> + Clone + 'static, { type Input = I; type Output = O; type Error = E; #[inline] - fn apply(&mut self, input: Self::Input, ctx: &Context) -> WorkerResult { - (self.0.f)(input, ctx) - } -} - -impl Clone for RetryCaller { - fn clone(&self) -> Self { - Self(self.0.clone()) - } -} - -impl Result>> Debug - for RetryCaller -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("RetryCaller") - } -} - -impl From for RetryCaller -where - F: FnMut(I, &Context) -> Result> + Clone + 'static, -{ - fn from(f: F) -> Self { - RetryCaller(Callable::of(f)) + fn apply(&mut self, input: Self::Input, ctx: &Context) -> WorkerResult { + (self.callable)(input, ctx) } } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::*; use crate::bee::Context; #[test] fn test_call() { - let mut worker = Caller::of(|input: u8| input + 1); + let mut worker = Caller::from(|input: u8| input + 1); assert!(matches!(worker.apply(5, &Context::empty()), Ok(6))) } + #[test] + fn test_clone() { + let worker1 = Caller::from(|input: u8| input + 1); + let worker2 = worker1.clone(); + let f = worker2.into_inner(); + assert_eq!(f(5), 6); + } + #[allow(clippy::type_complexity)] fn try_caller() -> RetryCaller< (bool, u8), u8, String, - impl FnMut((bool, u8), &Context) -> Result> + Clone + 'static, + impl FnMut((bool, u8), &Context<(bool, u8)>) -> Result> + + Clone + + 'static, > { - RetryCaller::of(|input: (bool, u8), _: &Context| { + RetryCaller::from(|input: (bool, u8), _: &Context<(bool, u8)>| { if input.0 { Ok(input.1 + 1) } else { @@ -285,6 +327,14 @@ mod tests { assert!(matches!(worker.apply((true, 5), &Context::empty()), Ok(6))); } + #[test] + fn test_clone_retry_caller() { + let worker1 = try_caller(); + let worker2 = worker1.clone(); + let mut f = worker2.into_inner(); + assert!(matches!(f((true, 5), &Context::empty()), Ok(6))); + } + #[test] fn test_try_call_fail() { let mut worker = try_caller(); @@ -306,7 +356,7 @@ mod tests { String, impl FnMut((bool, u8)) -> Result + Clone + 'static, > { - OnceCaller::of(|input: (bool, u8)| { + OnceCaller::from(|input: (bool, u8)| { if input.0 { Ok(input.1 + 1) } else { @@ -321,6 +371,14 @@ mod tests { assert!(matches!(worker.apply((true, 5), &Context::empty()), Ok(6))); } + #[test] + fn test_clone_once_caller() { + let worker1 = once_caller(); + let worker2 = worker1.clone(); + let mut f = worker2.into_inner(); + assert!(matches!(f((true, 5)), Ok(6))); + } + #[test] fn test_once_call_fail() { let mut worker = once_caller(); @@ -342,7 +400,7 @@ mod tests { String, impl FnMut(&(bool, u8)) -> Result + Clone + 'static, > { - RefCaller::of(|input: &(bool, u8)| { + RefCaller::from(|input: &(bool, u8)| { if input.0 { Ok(input.1 + 1) } else { @@ -357,6 +415,14 @@ mod tests { assert!(matches!(worker.apply((true, 5), &Context::empty()), Ok(6))); } + #[test] + fn test_clone_ref_caller() { + let worker1 = ref_caller(); + let worker2 = worker1.clone(); + let mut f = worker2.into_inner(); + assert!(matches!(f(&(true, 5)), Ok(6))); + } + #[test] fn test_ref_call_fail() { let mut worker = ref_caller(); diff --git a/src/bee/stock/echo.rs b/src/bee/stock/echo.rs index 4aa39c2..f3f9c59 100644 --- a/src/bee/stock/echo.rs +++ b/src/bee/stock/echo.rs @@ -1,29 +1,26 @@ use crate::bee::{Context, Worker, WorkerResult}; -use std::fmt::Debug; +use derive_more::Debug; use std::marker::PhantomData; +use std::{any, fmt}; /// A `Worker` that simply returns the input. -#[derive(Debug)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +#[debug("EchoWorker<{}>", any::type_name::())] pub struct EchoWorker(PhantomData); -impl Default for EchoWorker { - fn default() -> Self { - EchoWorker(PhantomData) - } -} - -impl Worker for EchoWorker { +impl Worker for EchoWorker { type Input = T; type Output = T; type Error = (); #[inline] - fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { + fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { Ok(input) } } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::*; use crate::bee::Context; diff --git a/src/bee/stock/thunk.rs b/src/bee/stock/thunk.rs index 32ed9d8..d8f90c6 100644 --- a/src/bee/stock/thunk.rs +++ b/src/bee/stock/thunk.rs @@ -1,11 +1,13 @@ use crate::bee::{ApplyError, Context, Worker, WorkerResult}; use crate::boxed::BoxedFnOnce; use crate::panic::Panic; -use std::fmt::Debug; +use derive_more::Debug; use std::marker::PhantomData; +use std::{any, fmt}; /// A `Worker` that executes infallible `Thunk`s when applied. #[derive(Debug)] +#[debug("ThunkWorker<{}>", any::type_name::())] pub struct ThunkWorker(PhantomData); impl Default for ThunkWorker { @@ -14,19 +16,26 @@ impl Default for ThunkWorker { } } -impl Worker for ThunkWorker { +impl Clone for ThunkWorker { + fn clone(&self) -> Self { + Self::default() + } +} + +impl Worker for ThunkWorker { type Input = Thunk; type Output = T; type Error = (); #[inline] - fn apply(&mut self, f: Self::Input, _: &Context) -> WorkerResult { + fn apply(&mut self, f: Self::Input, _: &Context) -> WorkerResult { Ok(f.0.call_box()) } } /// A `Worker` that executes fallible `Thunk>`s when applied. #[derive(Debug)] +#[debug("FunkWorker<{}, {}>", any::type_name::(), any::type_name::())] pub struct FunkWorker(PhantomData, PhantomData); impl Default for FunkWorker { @@ -35,13 +44,23 @@ impl Default for FunkWorker { } } -impl Worker for FunkWorker { +impl Clone for FunkWorker { + fn clone(&self) -> Self { + Self::default() + } +} + +impl Worker for FunkWorker +where + T: Send + fmt::Debug + 'static, + E: Send + fmt::Debug + 'static, +{ type Input = Thunk>; type Output = T; type Error = E; #[inline] - fn apply(&mut self, f: Self::Input, _: &Context) -> WorkerResult { + fn apply(&mut self, f: Self::Input, _: &Context) -> WorkerResult { f.0.call_box() .map_err(|error| ApplyError::Fatal { error, input: None }) } @@ -50,20 +69,27 @@ impl Worker for FunkWorker /// A `Worker` that executes `Thunk`s that may panic. A panic is caught and returned as an /// `ApplyError::Panic` error. #[derive(Debug)] +#[debug("PunkWorker<{}>", any::type_name::())] pub struct PunkWorker(PhantomData); impl Default for PunkWorker { fn default() -> Self { - PunkWorker(PhantomData) + Self(PhantomData) } } -impl Worker for PunkWorker { +impl Clone for PunkWorker { + fn clone(&self) -> Self { + Self::default() + } +} + +impl Worker for PunkWorker { type Input = Thunk; type Output = T; type Error = (); - fn apply(&mut self, f: Self::Input, _: &Context) -> WorkerResult { + fn apply(&mut self, f: Self::Input, _: &Context) -> WorkerResult { Panic::try_call_boxed(None, f.0).map_err(|payload| ApplyError::Panic { input: None, payload, @@ -72,10 +98,12 @@ impl Worker for PunkWorker { } /// A wrapper around a closure that can be executed exactly once by a worker in a `Hive`. +#[derive(Debug)] +#[debug("Thunk<{}>", any::type_name::())] pub struct Thunk(Box + Send>); -impl Thunk { - pub fn of T + Send + 'static>(f: F) -> Self { +impl T + Send + 'static> From for Thunk { + fn from(f: F) -> Self { Self(Box::new(f)) } } @@ -86,13 +114,8 @@ impl Thunk> { } } -impl Debug for Thunk { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("Thunk") - } -} - #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::*; use crate::bee::Context; @@ -100,7 +123,7 @@ mod tests { #[test] fn test_thunk() { let mut worker = ThunkWorker::::default(); - let thunk = Thunk::of(|| 5); + let thunk = Thunk::from(|| 5); assert_eq!(5, worker.apply(thunk, &Context::empty()).unwrap()); } diff --git a/src/bee/worker.rs b/src/bee/worker.rs index 177cefa..6fc3902 100644 --- a/src/bee/worker.rs +++ b/src/bee/worker.rs @@ -32,7 +32,7 @@ pub trait Worker: Debug + Sized + 'static { /// /// This method should not panic. If it may panic, then [`Panic::try_call`] should be used to /// catch the panic and turn it into an [`ApplyError::Panic`] error. - fn apply(&mut self, _: Self::Input, _: &Context) -> WorkerResult; + fn apply(&mut self, _: Self::Input, _: &Context) -> WorkerResult; /// Applies this `Worker`'s function sequentially to an iterator of inputs and returns a /// iterator over the outputs. @@ -65,7 +65,7 @@ pub trait RefWorker: Debug + Sized + 'static { /// The type of error produced by this function. type Error: Send + Debug; - fn apply_ref(&mut self, _: &Self::Input, _: &Context) -> RefWorkerResult; + fn apply_ref(&mut self, _: &Self::Input, _: &Context) -> RefWorkerResult; } /// Blanket implementation of `Worker` for `RefWorker` that calls `apply_ref` and catches any @@ -81,7 +81,7 @@ where type Output = O; type Error = E; - fn apply(&mut self, input: Self::Input, ctx: &Context) -> WorkerResult { + fn apply(&mut self, input: Self::Input, ctx: &Context) -> WorkerResult { match Panic::try_call(None, || self.apply_ref(&input, ctx)) { Ok(Ok(output)) => Ok(output), Ok(Err(error)) => Err(error.into_apply_error(input)), @@ -94,6 +94,7 @@ where } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::{ApplyRefError, RefWorker, RefWorkerResult, Worker, WorkerResult}; use crate::bee::{ApplyError, Context}; @@ -106,7 +107,7 @@ mod tests { type Output = u8; type Error = (); - fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { + fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { Ok(input + 1) } } @@ -133,7 +134,11 @@ mod tests { type Output = u8; type Error = (); - fn apply_ref(&mut self, input: &Self::Input, _: &Context) -> RefWorkerResult { + fn apply_ref( + &mut self, + input: &Self::Input, + _: &Context, + ) -> RefWorkerResult { match *input { 0 => Err(ApplyRefError::Retryable(())), 1 => Err(ApplyRefError::Fatal(())), diff --git a/src/channel.rs b/src/channel.rs index 0d195e1..b646767 100644 --- a/src/channel.rs +++ b/src/channel.rs @@ -2,12 +2,16 @@ //! //! A maximum one of the channel feature may be enabled. If no channel feature is enabled, then //! `std::sync::mpsc` will be used. +use derive_more::Debug; pub use prelude::channel; pub(crate) use prelude::*; +use std::any; /// Possible results of calling `ReceiverExt::try_recv_msg()` on a `Receiver`. +#[derive(Debug)] pub enum Message { /// A message was successfully received from the channel. + #[debug("Received: {}", any::type_name::())] Received(T), /// The channel was disconnected. ChannelDisconnected, @@ -30,7 +34,7 @@ pub trait ReceiverExt { #[cfg(not(any(feature = "crossbeam", feature = "flume", feature = "loole")))] pub mod prelude { - pub use std::sync::mpsc::{channel, Receiver, SendError, Sender}; + pub use std::sync::mpsc::{Receiver, SendError, Sender, channel}; use super::{Message, ReceiverExt, SenderExt}; use std::sync::mpsc::TryRecvError; @@ -57,7 +61,7 @@ pub mod prelude { #[cfg(all(feature = "crossbeam", not(any(feature = "flume", feature = "loole"))))] pub mod prelude { - pub use crossbeam_channel::{unbounded as channel, Receiver, SendError, Sender}; + pub use crossbeam_channel::{Receiver, SendError, Sender, unbounded as channel}; use super::{Message, ReceiverExt, SenderExt}; use crossbeam_channel::TryRecvError; @@ -84,7 +88,7 @@ pub mod prelude { #[cfg(all(feature = "flume", not(any(feature = "crossbeam", feature = "loole"))))] pub mod prelude { - pub use flume::{unbounded as channel, Receiver, SendError, Sender}; + pub use flume::{Receiver, SendError, Sender, unbounded as channel}; use super::{Message, ReceiverExt, SenderExt}; use flume::TryRecvError; @@ -111,7 +115,7 @@ pub mod prelude { #[cfg(all(feature = "loole", not(any(feature = "crossbeam", feature = "flume"))))] pub mod prelude { - pub use loole::{unbounded as channel, Receiver, SendError, Sender}; + pub use loole::{Receiver, SendError, Sender, unbounded as channel}; use super::{Message, ReceiverExt, SenderExt}; use loole::TryRecvError; diff --git a/src/hive/builder.rs b/src/hive/builder.rs deleted file mode 100644 index 5532e3e..0000000 --- a/src/hive/builder.rs +++ /dev/null @@ -1,613 +0,0 @@ -use super::{Config, Hive}; -use crate::bee::{CloneQueen, DefaultQueen, Queen, Worker}; - -/// A `Builder` for a [`Hive`](crate::hive::Hive). -/// -/// Calling [`Builder::new()`] creates an unconfigured `Builder`, while calling -/// [`Builder::default()`] creates a `Builder` with fields pre-set to the global default values. -/// Global defaults can be changed using the -/// [`beekeeper::hive::set_*_default`](crate::hive#functions) functions. -/// -/// The configuration options available: -/// * [`Builder::num_threads`]: number of worker threads that will be spawned by the built `Hive`. -/// * [`Builder::with_default_num_threads`] will set `num_threads` to the global default value. -/// * [`Builder::with_thread_per_core`] will set `num_threads` to the number of available CPU -/// cores. -/// * [`Builder::thread_name`]: thread name for each of the threads spawned by the built `Hive`. By -/// default, threads are unnamed. -/// * [`Builder::thread_stack_size`]: stack size (in bytes) for each of the threads spawned by the -/// built `Hive`. See the -/// [`std::thread`](https://doc.rust-lang.org/stable/std/thread/index.html#stack-size) -/// documentation for details on the default stack size. -/// -/// The following configuration options are available when the `retry` feature is enabled: -/// * [`Builder::max_retries`]: maximum number of times a `Worker` will retry an -/// [`ApplyError::Retryable`](crate::bee::ApplyError#Retryable) before giving up. -/// * [`Builder::retry_factor`]: [`Duration`](std::time::Duration) factor for exponential backoff -/// when retrying an `ApplyError::Retryable` error. -/// * [`Builder::with_default_retries`] sets the retry options to the global defaults, while -/// [`Builder::with_no_retries`] disabled retrying. -/// -/// The following configuration options are available when the `affinity` feature is enabled: -/// * [`Builder::core_affinity`]: List of CPU core indices to which the threads should be pinned. -/// * [`Builder::with_default_core_affinity`] will set the list to all CPU core indices, though -/// only the first `num_threads` indices will be used. -/// -/// To create the [`Hive`], call one of the `build*` methods: -/// * [`Builder::build`] requires a [`Queen`] instance. -/// * [`Builder::build_default`] requires a [`Queen`] type that implements [`Default`]. -/// * [`Builder::build_with`] requires a [`Worker`] instance that implements [`Clone`]. -/// * [`Builder::build_with_default`] requires a [`Worker`] type that implements [`Default`]. -/// -/// # Examples -/// -/// Build a [`Hive`] that uses a maximum of eight threads simultaneously and each thread has -/// a 8 MB stack size: -/// -/// ``` -/// type MyWorker = beekeeper::bee::stock::ThunkWorker<()>; -/// -/// let hive = beekeeper::hive::Builder::new() -/// .num_threads(8) -/// .thread_stack_size(8_000_000) -/// .build_with_default::(); -/// ``` -#[derive(Clone)] -pub struct Builder(Config); - -impl Builder { - /// Returns a new `Builder` with no options configured. - pub fn new() -> Self { - Self(Config::empty()) - } - - /// Sets the maximum number of worker threads that will be alive at any given moment in the - /// built [`Hive`]. If not specified, the built `Hive` will not be initialized with worker - /// threads until [`Hive::grow`] is called. - /// - /// # Examples - /// - /// No more than eight threads will be alive simultaneously for this hive: - /// - /// ``` - /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; - /// use beekeeper::hive::{Builder, Hive}; - /// - /// # fn main() { - /// let hive = Builder::new() - /// .num_threads(8) - /// .build_with_default::>(); - /// - /// for _ in 0..100 { - /// hive.apply_store(Thunk::of(|| { - /// println!("Hello from a worker thread!") - /// })); - /// } - /// # } - /// ``` - pub fn num_threads(mut self, num: usize) -> Self { - let _ = self.0.num_threads.set(Some(num)); - self - } - - /// Sets the number of worker threads to the global default value. - pub fn with_default_num_threads(mut self) -> Self { - let _ = self - .0 - .num_threads - .set(super::config::DEFAULTS.lock().num_threads.get()); - self - } - - /// Specifies that the built [`Hive`] will use all available CPU cores for worker threads. - /// - /// # Examples - /// - /// All available threads will be alive simultaneously for this hive: - /// - /// ``` - /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; - /// use beekeeper::hive::{Builder, Hive}; - /// - /// # fn main() { - /// let hive = Builder::new() - /// .with_thread_per_core() - /// .build_with_default::>(); - /// - /// for _ in 0..100 { - /// hive.apply_store(Thunk::of(|| { - /// println!("Hello from a worker thread!") - /// })); - /// } - /// # } - /// ``` - pub fn with_thread_per_core(mut self) -> Self { - let _ = self.0.num_threads.set(Some(num_cpus::get())); - self - } - - /// Sets the thread name for each of the threads spawned by the built [`Hive`]. If not - /// specified, threads spawned by the thread pool will be unnamed. - /// - /// # Examples - /// - /// Each thread spawned by this hive will have the name `"foo"`: - /// - /// ``` - /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; - /// use beekeeper::hive::{Builder, Hive}; - /// use std::thread; - /// - /// # fn main() { - /// let hive = Builder::default() - /// .thread_name("foo") - /// .build_with_default::>(); - /// - /// for _ in 0..100 { - /// hive.apply_store(Thunk::of(|| { - /// assert_eq!(thread::current().name(), Some("foo")); - /// })); - /// } - /// # hive.join(); - /// # } - /// ``` - pub fn thread_name>(mut self, name: T) -> Self { - let _ = self.0.thread_name.set(Some(name.into())); - self - } - - /// Sets the stack size (in bytes) for each of the threads spawned by the built [`Hive`]. - /// If not specified, threads spawned by the hive will have a stack size [as specified in - /// the `std::thread` documentation][thread]. - /// - /// [thread]: https://doc.rust-lang.org/nightly/std/thread/index.html#stack-size - /// - /// # Examples - /// - /// Each thread spawned by this hive will have a 4 MB stack: - /// - /// ``` - /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; - /// use beekeeper::hive::{Builder, Hive}; - /// - /// # fn main() { - /// let hive = Builder::default() - /// .thread_stack_size(4_000_000) - /// .build_with_default::>(); - /// - /// for _ in 0..100 { - /// hive.apply_store(Thunk::of(|| { - /// println!("This thread has a 4 MB stack size!"); - /// })); - /// } - /// # hive.join(); - /// # } - /// ``` - pub fn thread_stack_size(mut self, size: usize) -> Self { - let _ = self.0.thread_stack_size.set(Some(size)); - self - } - - /// Consumes this `Builder` and returns a new [`Hive`] using the given [`Queen`] to create - /// [`Worker`]s. - /// - /// Returns an error if there was an error spawning the worker threads. - /// - /// # Examples - /// - /// ``` - /// # use beekeeper::hive::{Builder, Hive}; - /// # use beekeeper::bee::{Context, Queen, Worker, WorkerResult}; - /// - /// #[derive(Debug)] - /// struct CounterWorker { - /// index: usize, - /// input_count: usize, - /// input_sum: usize, - /// } - /// - /// impl CounterWorker { - /// fn new(index: usize) -> Self { - /// Self { - /// index, - /// input_count: 0, - /// input_sum: 0, - /// } - /// } - /// } - /// - /// impl Worker for CounterWorker { - /// type Input = usize; - /// type Output = String; - /// type Error = (); - /// - /// fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { - /// self.input_count += 1; - /// self.input_sum += input; - /// let s = format!( - /// "CounterWorker {}: Input {}, Count {}, Sum {}", - /// self.index, input, self.input_count, self.input_sum - /// ); - /// Ok(s) - /// } - /// } - /// - /// #[derive(Debug, Default)] - /// struct CounterQueen { - /// num_workers: usize - /// } - /// - /// impl Queen for CounterQueen { - /// type Kind = CounterWorker; - /// - /// fn create(&mut self) -> Self::Kind { - /// self.num_workers += 1; - /// CounterWorker::new(self.num_workers) - /// } - /// } - /// - /// # fn main() { - /// let hive = Builder::new() - /// .num_threads(8) - /// .thread_stack_size(4_000_000) - /// .build(CounterQueen::default()); - /// - /// for i in 0..100 { - /// hive.apply_store(i); - /// } - /// let husk = hive.try_into_husk().unwrap(); - /// assert_eq!(husk.queen().num_workers, 8); - /// # } - /// ``` - pub fn build(self, queen: Q) -> Hive { - Hive::new(self.0, queen) - } - - /// Consumes this `Builder` and returns a new [`Hive`] using a [`Queen`] created with - /// [`Q::default()`](std::default::Default) to create [`Worker`]s. - /// - /// Returns an error if there was an error spawning the worker threads. - pub fn build_default(self) -> Hive { - Hive::new(self.0, Q::default()) - } - - /// Consumes this `Builder` and returns a new [`Hive`] with [`Worker`]s created by cloning - /// `worker`. - /// - /// Returns an error if there was an error spawning the worker threads. - /// - /// # Examples - /// - /// ``` - /// # use beekeeper::hive::{Builder, OutcomeIteratorExt}; - /// # use beekeeper::bee::{Context, Worker, WorkerResult}; - /// - /// #[derive(Debug, Clone)] - /// struct MathWorker(isize); - /// - /// impl MathWorker { - /// fn new(left_operand: isize) -> Self { - /// assert!(left_operand != 0); - /// Self(left_operand) - /// } - /// } - /// - /// impl Worker for MathWorker { - /// type Input = (isize, u8); - /// type Output = isize; - /// type Error = (); - /// - /// fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { - /// let (operand, operator) = input; - /// let value = match operator % 4 { - /// 0 => operand + self.0, - /// 1 => operand - self.0, - /// 2 => operand * self.0, - /// 3 => operand / self.0, - /// _ => unreachable!(), - /// }; - /// Ok(value) - /// } - /// } - /// - /// # fn main() { - /// let hive = Builder::new() - /// .num_threads(8) - /// .thread_stack_size(4_000_000) - /// .build_with(MathWorker(5isize)); - /// - /// let sum: isize = hive - /// .map((0..100).zip((0..4).cycle())) - /// .into_outputs() - /// .sum(); - /// assert_eq!(sum, 8920); - /// # } - /// ``` - pub fn build_with(self, worker: W) -> Hive> - where - W: Worker + Send + Sync + Clone, - { - Hive::new(self.0, CloneQueen::new(worker)) - } - - /// Consumes this `Builder` and returns a new [`Hive`] with [`Worker`]s created using - /// [`W::default()`](std::default::Default). - /// - /// Returns a [`SpawnError`](crate::hive::SpawnError) if there was an error spawning the - /// worker threads. - /// - /// # Examples - /// - /// ``` - /// # use beekeeper::hive::{Builder, OutcomeIteratorExt}; - /// # use beekeeper::bee::{Context, Worker, WorkerResult}; - /// # use std::num::NonZeroIsize; - /// - /// #[derive(Debug, Default)] - /// struct MathWorker(isize); // value is always `0` - /// - /// impl Worker for MathWorker { - /// type Input = (NonZeroIsize, u8); - /// type Output = isize; - /// type Error = (); - /// - /// fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { - /// let (operand, operator) = input; - /// let result = match operator % 4 { - /// 0 => self.0 + operand.get(), - /// 1 => self.0 - operand.get(), - /// 2 => self.0 * operand.get(), - /// 3 => self.0 / operand.get(), - /// _ => unreachable!(), - /// }; - /// Ok(result) - /// } - /// } - /// - /// # fn main() { - /// let hive = Builder::new() - /// .num_threads(8) - /// .thread_stack_size(4_000_000) - /// .build_with_default::(); - /// - /// let sum: isize = hive - /// .map((1..=100).map(|i| NonZeroIsize::new(i).unwrap()).zip((0..4).cycle())) - /// .into_outputs() - /// .sum(); - /// assert_eq!(sum, -25); - /// # } - /// ``` - pub fn build_with_default(self) -> Hive> - where - W: Worker + Send + Sync + Default, - { - Hive::new(self.0, DefaultQueen::default()) - } -} - -impl Default for Builder { - /// Creates a new `Builder` with default configuration options: - /// * `num_threads = config::DEFAULT_NUM_THREADS` - /// - /// The following default configuration options are used when the `retry` feature is enabled: - /// * `max_retries = config::retry::DEFAULT_MAX_RETRIES` - /// * `retry_factor = config::retry::DEFAULT_RETRY_FACTOR_SECS` - fn default() -> Self { - Builder(Config::with_defaults()) - } -} - -impl From for Builder { - fn from(value: Config) -> Self { - Self(value) - } -} - -#[cfg(feature = "affinity")] -mod affinity { - use super::Builder; - use crate::hive::cores::Cores; - - impl Builder { - /// Sets set list of CPU core indices to which threads in the `Hive` should be pinned. - /// - /// Core indices are integers in the range `0..N`, where `N` is the number of available CPU - /// cores as reported by [`num_cpus::get()`]. The mapping between core indices and core IDs - /// is platform-specific. All CPU cores on a given system should be equivalent, and thus it - /// does not matter which cores are pinned so long as a core is not pinned to multiple - /// threads. - /// - /// Excess core indices (i.e., if `affinity.len() > num_threads`) are ignored. If - /// `affinity.len() < num_threads` then the excess threads will not be pinned. - /// - /// # Examples - /// - /// Each thread spawned by this hive will be pinned to a core: - /// - /// ``` - /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; - /// use beekeeper::hive::{Builder, Hive}; - /// - /// # fn main() { - /// let hive = Builder::new() - /// .num_threads(4) - /// .core_affinity(0..4) - /// .build_with_default::>(); - /// - /// for _ in 0..100 { - /// hive.apply_store(Thunk::of(|| { - /// println!("This thread is pinned!"); - /// })); - /// } - /// # hive.join(); - /// # } - /// ``` - pub fn core_affinity>(mut self, affinity: C) -> Self { - let _ = self.0.affinity.set(Some(affinity.into())); - self - } - - /// Specifies that worker threads should be pinned to all available CPU cores. If - /// `num_threads` is greater than the available number of CPU cores, then some threads - /// might not be pinned. - pub fn with_default_core_affinity(mut self) -> Self { - let _ = self.0.affinity.set(Some(Cores::all())); - self - } - } - - #[cfg(test)] - mod tests { - use crate::hive::cores::Cores; - use crate::hive::Builder; - - #[test] - fn test_with_affinity() { - let mut builder = Builder::new(); - builder = builder.with_default_core_affinity(); - assert_eq!(builder.0.affinity.get(), Some(Cores::all())); - } - } -} - -#[cfg(feature = "batching")] -mod batching { - use super::Builder; - - impl Builder { - /// Sets the worker thread batch size. If `batch_size` is `0`, batching is disabled, but - /// note that the performance may be worse than with the `batching` feature disabled. - pub fn batch_size(mut self, batch_size: usize) -> Self { - if batch_size == 0 { - self.0.batch_size.set(None); - } else { - self.0.batch_size.set(Some(batch_size)); - } - self - } - - /// Sets the worker thread batch size to the global default value. - pub fn with_default_batch_size(mut self) -> Self { - let _ = self - .0 - .batch_size - .set(crate::hive::config::DEFAULTS.lock().batch_size.get()); - self - } - } -} - -#[cfg(feature = "retry")] -mod retry { - use super::Builder; - use std::time::Duration; - - impl Builder { - /// Sets the maximum number of times to retry a - /// [`ApplyError::Retryable`](crate::bee::ApplyError::Retryable) error. A worker - /// thread will retry a task until it either returns - /// [`ApplyError::Fatal`](crate::bee::ApplyError::Fatal) or the maximum number of retries is - /// reached. Each time a task is retried, the worker thread will first sleep for - /// `retry_factor * (2 ** (attempt - 1))` before attempting the task again. If not - /// specified, tasks are retried a default number of times. If set to `0`, tasks will be - /// retried immediately without delay. - /// - /// # Examples - /// - /// ``` - /// use beekeeper::bee::{ApplyError, Context}; - /// use beekeeper::bee::stock::RetryCaller; - /// use beekeeper::hive::{Builder, Hive}; - /// use std::time; - /// - /// fn sometimes_fail( - /// i: usize, - /// _: &Context - /// ) -> Result> { - /// match i % 3 { - /// 0 => Ok("Success".into()), - /// 1 => Err(ApplyError::Retryable { input: i, error: "Retryable".into() }), - /// 2 => Err(ApplyError::Fatal { input: Some(i), error: "Fatal".into() }), - /// _ => unreachable!(), - /// } - /// } - /// - /// # fn main() { - /// let hive = Builder::default() - /// .max_retries(3) - /// .build_with(RetryCaller::of(sometimes_fail)); - /// - /// for i in 0..10 { - /// hive.apply_store(i); - /// } - /// # hive.join(); - /// # } - /// ``` - pub fn max_retries(mut self, limit: u32) -> Self { - let _ = if limit == 0 { - self.0.max_retries.set(None) - } else { - self.0.max_retries.set(Some(limit)) - }; - self - } - - /// Sets the exponential back-off factor for retrying tasks. Each time a task is retried, - /// the thread will first sleep for `retry_factor * (2 ** (attempt - 1))`. If not - /// specififed, a default retry factor is used. Set to - /// [`Duration::ZERO`](std::time::Duration::ZERO) to disableexponential backoff. - /// - /// # Examples - /// - /// ``` - /// use beekeeper::bee::{ApplyError, Context}; - /// use beekeeper::bee::stock::RetryCaller; - /// use beekeeper::hive::{Builder, Hive}; - /// use std::time; - /// - /// fn echo_time(i: usize, ctx: &Context) -> Result> { - /// let attempt = ctx.attempt(); - /// if attempt == 3 { - /// Ok("Success".into()) - /// } else { - /// // the delay between each message should be exponential - /// println!("Task {} attempt {}: {:?}", i, attempt, time::SystemTime::now()); - /// Err(ApplyError::Retryable { input: i, error: "Retryable".into() }) - /// } - /// } - /// - /// # fn main() { - /// let hive = Builder::default() - /// .max_retries(3) - /// .retry_factor(time::Duration::from_secs(1)) - /// .build_with(RetryCaller::of(echo_time)); - /// - /// for i in 0..10 { - /// hive.apply_store(i); - /// } - /// # hive.join(); - /// # } - /// ``` - pub fn retry_factor(mut self, duration: Duration) -> Self { - let _ = if duration == Duration::ZERO { - self.0.retry_factor.set(None) - } else { - self.0.set_retry_factor_from(duration) - }; - self - } - - /// Sets retry parameters to their default values. - pub fn with_default_retries(mut self) -> Self { - let defaults = crate::hive::config::DEFAULTS.lock(); - let _ = self.0.max_retries.set(defaults.max_retries.get()); - let _ = self.0.retry_factor.set(defaults.retry_factor.get()); - self - } - - /// Disables retrying tasks. - pub fn with_no_retries(self) -> Self { - self.max_retries(0).retry_factor(Duration::ZERO) - } - } -} diff --git a/src/hive/builder/bee.rs b/src/hive/builder/bee.rs new file mode 100644 index 0000000..a393b84 --- /dev/null +++ b/src/hive/builder/bee.rs @@ -0,0 +1,295 @@ +use super::{BuilderConfig, FullBuilder, Token}; +use crate::bee::{CloneQueen, DefaultQueen, Queen, QueenCell, QueenMut, Worker}; +use crate::hive::{ChannelTaskQueues, Config, TaskQueues, WorkstealingTaskQueues}; +use derive_more::Debug; +use std::any; + +/// A Builder for creating `Hive` instances for specific [`Worker`] and [`TaskQueues`] types. +#[derive(Clone, Default, Debug)] +pub struct BeeBuilder { + config: Config, + #[debug("{}",any::type_name::())] + queen: Q, +} + +impl BeeBuilder { + /// Creates a new `BeeBuilder` with the given queen and no options configured. + pub fn empty(queen: Q) -> Self { + Self { + config: Config::empty(), + queen, + } + } + + /// Creates a new `BeeBuilder` with the given `queen` and options configured with global + /// preset values. + pub fn preset(queen: Q) -> Self { + Self { + config: Config::default(), + queen, + } + } + + /// Creates a new `BeeBuilder` from an existing `config` and a `queen`. + pub(super) fn from_config_and_queen(config: Config, queen: Q) -> Self { + Self { config, queen } + } + + /// Creates a new `FullBuilder` with the current configuration and queen and specified + /// `TaskQueues` type. + pub fn with_queues>(self) -> FullBuilder { + FullBuilder::from_config_and_queen(self.config, self.queen) + } + + /// Creates a new `FullBuilder` with the current configuration and queen and channel-based + /// task queues. + pub fn with_channel_queues(self) -> FullBuilder> { + FullBuilder::from_config_and_queen(self.config, self.queen) + } + + /// Creates a new `FullBuilder` with the current configuration and queen and workstealing + /// task queues. + pub fn with_workstealing_queues(self) -> FullBuilder> { + FullBuilder::from_config_and_queen(self.config, self.queen) + } +} + +impl BeeBuilder { + /// Creates a new `BeeBuilder` with a queen created with + /// [`Q::default()`](std::default::Default) and no options configured. + pub fn empty_with_queen_default() -> Self { + Self { + config: Config::empty(), + queen: Q::default(), + } + } + + /// Creates a new `BeeBuilder` with a queen created with + /// [`Q::default()`](std::default::Default) and options configured with global defaults. + pub fn preset_with_queen_default() -> Self { + Self { + config: Config::default(), + queen: Q::default(), + } + } +} + +impl BeeBuilder> { + /// Creates a new `BeeBuilder` with a queen created with + /// [`Q::default()`](std::default::Default) and no options configured. + pub fn empty_with_queen_mut_default() -> Self { + Self { + config: Config::empty(), + queen: QueenCell::new(Q::default()), + } + } + + /// Creates a new `BeeBuilder` with a queen created with + /// [`Q::default()`](std::default::Default) and options configured with global defaults. + pub fn preset_with_queen_mut_default() -> Self { + Self { + config: Config::default(), + queen: QueenCell::new(Q::default()), + } + } +} + +impl BeeBuilder> { + /// Creates a new `BeeBuilder` with a `CloneQueen` created with the given `worker` and no + /// options configured. + pub fn empty_with_worker(worker: W) -> Self { + Self { + config: Config::empty(), + queen: CloneQueen::new(worker), + } + } + + /// Creates a new `BeeBuilder` with a `CloneQueen` created with the given `worker` and + /// and options configured with global defaults. + pub fn preset_with_worker(worker: W) -> Self { + Self { + config: Config::default(), + queen: CloneQueen::new(worker), + } + } +} + +impl BeeBuilder> { + /// Creates a new `BeeBuilder` with a `DefaultQueen` created with the given `Worker` type and + /// no options configured. + pub fn empty_with_worker_default() -> Self { + Self { + config: Config::empty(), + queen: DefaultQueen::default(), + } + } + + /// Creates a new `BeeBuilder` with a `DefaultQueen` created with the given `Worker` type and + /// and options configured with global defaults. + pub fn preset_with_worker_default() -> Self { + Self { + config: Config::default(), + queen: DefaultQueen::default(), + } + } +} + +impl BuilderConfig for BeeBuilder { + fn config_ref(&mut self, _: Token) -> &mut Config { + &mut self.config + } +} + +impl From for BeeBuilder { + fn from(value: Config) -> Self { + Self::from_config_and_queen(value, Q::default()) + } +} + +impl From for BeeBuilder { + fn from(value: Q) -> Self { + Self::from_config_and_queen(Config::default(), value) + } +} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use super::*; + use crate::bee::stock::EchoWorker; + use crate::bee::{CloneQueen, DefaultQueen, Queen, QueenCell, QueenMut}; + use rstest::rstest; + + #[derive(Clone, Default)] + struct TestQueen; + + impl Queen for TestQueen { + type Kind = EchoWorker; + + fn create(&self) -> Self::Kind { + EchoWorker::default() + } + } + + impl QueenMut for TestQueen { + type Kind = EchoWorker; + + fn create(&mut self) -> Self::Kind { + EchoWorker::default() + } + } + + #[rstest] + fn test_queen( + #[values( + BeeBuilder::::empty, + BeeBuilder::::preset + )] + factory: F, + #[values( + BeeBuilder::::with_channel_queues, + BeeBuilder::::with_workstealing_queues, + )] + with_fn: W, + ) where + F: Fn(TestQueen) -> BeeBuilder, + T: TaskQueues>, + W: Fn(BeeBuilder) -> FullBuilder, + { + let bee_builder = factory(TestQueen); + let full_builder = with_fn(bee_builder); + let _hive = full_builder.build(); + } + + #[rstest] + fn test_queen_default( + #[values( + BeeBuilder::::empty_with_queen_default, + BeeBuilder::::preset_with_queen_default + )] + factory: F, + #[values( + BeeBuilder::::with_channel_queues, + BeeBuilder::::with_workstealing_queues, + )] + with_fn: W, + ) where + F: Fn() -> BeeBuilder, + T: TaskQueues>, + W: Fn(BeeBuilder) -> FullBuilder, + { + let bee_builder = factory(); + let full_builder = with_fn(bee_builder); + let _hive = full_builder.build(); + } + + #[rstest] + fn test_queen_mut_default( + #[values( + BeeBuilder::>::empty_with_queen_mut_default, + BeeBuilder::>::preset_with_queen_mut_default + )] + factory: F, + #[values( + BeeBuilder::>::with_channel_queues, + BeeBuilder::>::with_workstealing_queues, + )] + with_fn: W, + ) where + F: Fn() -> BeeBuilder>, + T: TaskQueues>, + W: Fn(BeeBuilder>) -> FullBuilder, T>, + { + let bee_builder = factory(); + let full_builder = with_fn(bee_builder); + let _hive = full_builder.build(); + } + + #[rstest] + fn test_worker( + #[values( + BeeBuilder::>>::empty_with_worker, + BeeBuilder::>>::preset_with_worker + )] + factory: F, + #[values( + BeeBuilder::>>::with_channel_queues, + BeeBuilder::>>::with_workstealing_queues, + )] + with_fn: W, + ) where + F: Fn(EchoWorker) -> BeeBuilder>>, + T: TaskQueues>, + W: Fn( + BeeBuilder>>, + ) -> FullBuilder>, T>, + { + let bee_builder = factory(EchoWorker::default()); + let full_builder = with_fn(bee_builder); + let _hive = full_builder.build(); + } + + #[rstest] + fn test_worker_default( + #[values( + BeeBuilder::>>::empty_with_worker_default, + BeeBuilder::>>::preset_with_worker_default + )] + factory: F, + #[values( + BeeBuilder::>>::with_channel_queues, + BeeBuilder::>>::with_workstealing_queues, + )] + with_fn: W, + ) where + F: Fn() -> BeeBuilder>>, + T: TaskQueues>, + W: Fn( + BeeBuilder>>, + ) -> FullBuilder>, T>, + { + let bee_builder = factory(); + let full_builder = with_fn(bee_builder); + let _hive = full_builder.build(); + } +} diff --git a/src/hive/builder/full.rs b/src/hive/builder/full.rs new file mode 100644 index 0000000..4865420 --- /dev/null +++ b/src/hive/builder/full.rs @@ -0,0 +1,118 @@ +use super::{BuilderConfig, Token}; +use crate::bee::Queen; +use crate::hive::{Config, Hive, TaskQueues}; +use derive_more::Debug; +use std::any; +use std::marker::PhantomData; + +/// A Builder for creating `Hive` instances for specific [`Queen`] and [`TaskQueues`] types. +#[derive(Clone, Default, Debug)] +pub struct FullBuilder> { + config: Config, + #[debug("{}", any::type_name::())] + queen: Q, + #[debug("{}", any::type_name::())] + _queues: PhantomData, +} + +impl> FullBuilder { + /// Creates a new `FullBuilder` with the given queen and no options configured. + pub fn empty(queen: Q) -> Self { + Self { + config: Config::empty(), + queen, + _queues: PhantomData, + } + } + + /// Creates a new `FullBuilder` with the given `queen` and options configured with global + /// defaults. + pub fn preset(queen: Q) -> Self { + Self { + config: Config::default(), + queen, + _queues: PhantomData, + } + } + + /// Creates a new `FullBuilder` from an existing `config` and a `queen`. + pub(super) fn from_config_and_queen(config: Config, queen: Q) -> Self { + Self { + config, + queen, + _queues: PhantomData, + } + } + + /// Consumes this `Builder` and returns a new [`Hive`]. + pub fn build(self) -> Hive { + Hive::new(self.config, self.queen) + } +} + +impl> From for FullBuilder { + fn from(value: Config) -> Self { + Self::from_config_and_queen(value, Q::default()) + } +} + +impl> From for FullBuilder { + fn from(value: Q) -> Self { + Self::from_config_and_queen(Config::default(), value) + } +} + +impl> BuilderConfig for FullBuilder { + fn config_ref(&mut self, _: Token) -> &mut Config { + &mut self.config + } +} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use super::*; + use crate::bee::Queen; + use crate::bee::stock::EchoWorker; + use crate::hive::{ChannelTaskQueues, WorkstealingTaskQueues}; + use rstest::rstest; + + #[derive(Clone, Default)] + struct TestQueen; + + impl Queen for TestQueen { + type Kind = EchoWorker; + + fn create(&self) -> Self::Kind { + EchoWorker::default() + } + } + + #[rstest] + fn test_channel( + #[values( + FullBuilder::>>::empty, + FullBuilder::>>::preset + )] + factory: F, + ) where + F: Fn(TestQueen) -> FullBuilder>>, + { + let builder = factory(TestQueen); + let _hive = builder.build(); + } + + #[rstest] + fn test_workstealing( + #[values( + FullBuilder::>>::empty, + FullBuilder::>>::preset + )] + factory: F, + ) where + F: Fn(TestQueen) -> FullBuilder>>, + { + let builder = factory(TestQueen); + let _hive = builder.build(); + } +} diff --git a/src/hive/builder/mod.rs b/src/hive/builder/mod.rs new file mode 100644 index 0000000..62af81d --- /dev/null +++ b/src/hive/builder/mod.rs @@ -0,0 +1,103 @@ +//! There are a few different builder types. +//! +//! * Open: has no type parameters; can only set config parameters. Has methods to create +//! typed builders. +//! * Bee-typed: has type parameters for the `Worker` and `Queen` types. +//! * Queue-typed: builder instances that are specific to the `TaskQueues` type. +//! * Fully-typed: builder that has type parameters for the `Worker`, `Queen`, and `TaskQueues` +//! types. This is the only builder with a `build` method to create a `Hive`. +//! +//! All builders implement the `Builder` trait, which provides methods to set configuration +//! parameters. The configuration options available: +//! * [`Builder::num_threads`]: number of worker threads that will be spawned by the built `Hive`. +//! * [`Builder::with_default_num_threads`] will set `num_threads` to the global default value. +//! * [`Builder::with_thread_per_core`] will set `num_threads` to the number of available CPU +//! cores. +//! * [`Builder::thread_name`]: thread name for each of the threads spawned by the built `Hive`. By +//! default, threads are unnamed. +//! * [`Builder::thread_stack_size`]: stack size (in bytes) for each of the threads spawned by the +//! built `Hive`. See the +//! [`std::thread`](https://doc.rust-lang.org/stable/std/thread/index.html#stack-size) +//! documentation for details on the default stack size. +//! +//! The following configuration options are available when the `affinity` feature is enabled: +//! * [`Builder::core_affinity`]: List of CPU core indices to which the threads should be pinned. +//! * [`Builder::with_default_core_affinity`] will set the list to all CPU core indices, though +//! only the first `num_threads` indices will be used. +//! +//! The following configuration options are available when the `local-batch` feature is enabled: +//! * [`Builder::batch_limit`]: Maximum number of tasks that can queued by a worker. +//! * [`Builder::weight_limit`]: Maximum "weight" of tasks that can be queued by a worker. +//! * [`Builder::with_default_batch_limit`] and [`Builder::with_default_weight_limit`] set the +//! local-batch options to the global defaults, while [`Builder::with_no_local_batching`] +//! disables local-batching. +//! +//! The following configuration options are available when the `retry` feature is enabled: +//! * [`Builder::max_retries`]: maximum number of times a `Worker` will retry an +//! [`ApplyError::Retryable`](crate::bee::ApplyError#Retryable) before giving up. +//! * [`Builder::retry_factor`]: [`Duration`](std::time::Duration) factor for exponential backoff +//! when retrying an `ApplyError::Retryable` error. +//! * [`Builder::with_default_max_retries`] and [`Builder::with_default_retry_factor`] set the +//! retry options to the global defaults, while [`Builder::with_no_retries`] disables retrying. +mod bee; +mod full; +mod open; +mod queue; + +pub use bee::BeeBuilder; +pub use full::FullBuilder; +pub use open::OpenBuilder; +pub use queue::TaskQueuesBuilder; +pub use queue::channel::ChannelBuilder; +pub use queue::workstealing::WorkstealingBuilder; + +use crate::hive::inner::{Builder, BuilderConfig, Token}; + +/// Creates a new `OpenBuilder`. If `with_defaults` is `true`, the builder will be pre-configured +/// with the global defaults. +pub fn open(with_defaults: bool) -> OpenBuilder { + if with_defaults { + OpenBuilder::default() + } else { + OpenBuilder::empty() + } +} + +/// Creates a new `ChannelBuilder`. If `with_defaults` is `true`, the builder will be +/// pre-configured with the global defaults. +pub fn channel(with_defaults: bool) -> ChannelBuilder { + if with_defaults { + ChannelBuilder::default() + } else { + ChannelBuilder::empty() + } +} +/// Creates a new `WorkstealingBuilder`. If `with_defaults` is `true`, the builder will be +/// pre-configured with the global defaults. +pub fn workstealing(with_defaults: bool) -> WorkstealingBuilder { + if with_defaults { + WorkstealingBuilder::default() + } else { + WorkstealingBuilder::empty() + } +} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use super::*; + use crate::hive::Builder; + use rstest::*; + + #[rstest] + fn test_create B>( + #[values(open, channel, workstealing)] builder_factory: F, + #[values(true, false)] with_defaults: bool, + ) { + let mut builder = builder_factory(with_defaults) + .num_threads(4) + .thread_name("foo") + .thread_stack_size(100); + crate::hive::inner::builder_test_utils::check_builder(&mut builder); + } +} diff --git a/src/hive/builder/open.rs b/src/hive/builder/open.rs new file mode 100644 index 0000000..5600870 --- /dev/null +++ b/src/hive/builder/open.rs @@ -0,0 +1,401 @@ +use super::{BeeBuilder, BuilderConfig, ChannelBuilder, Token, WorkstealingBuilder}; +use crate::bee::{CloneQueen, DefaultQueen, Queen, QueenCell, QueenMut, Worker}; +use crate::hive::Config; + +/// A builder for a [`Hive`](crate::hive::Hive). +/// +/// Calling [`OpenBuilder::empty()`] creates an unconfigured `Builder`, while calling +/// [`OpenBuilder::default()`] creates a `Builder` with fields preset to the global default values. +/// Global defaults can be changed using the +/// [`beekeeper::hive::set_*_default`](crate::hive#functions) functions. +/// +/// See the [module documentation](crate::hive::builder) for details on the available configuration +/// options. +/// +/// This builder needs to be specialized to both the `Queen` and `TaskQueues` types. You can do +/// this in either order. +/// +/// * Calling one of the `with_queen*` methods returns a `BeeBuilder` specialized to a `Queen`. +/// * Calling `with_worker` or `with_worker_default` returns a `BeeBuilder` specialized to a +/// `CloneQueen` or `DefaultQueen` (respectively) for a specific `Worker` type. +/// * Calling `with_channel_queues` or `with_workstealing_queues` returns a `ChannelBuilder` or +/// `WorkstealingBuilder` specialized to a `TaskQueues` type. +/// +/// # Examples +/// +/// Build a [`Hive`](crate::hive::Hive) that uses a maximum of eight threads simultaneously and +/// each thread has a 8 MB stack size: +/// +/// ``` +/// # use beekeeper::hive::{Builder, OpenBuilder}; +/// type MyWorker = beekeeper::bee::stock::ThunkWorker<()>; +/// +/// let hive = OpenBuilder::empty() +/// .num_threads(8) +/// .thread_stack_size(8_000_000) +/// .with_worker_default::() +/// .with_channel_queues() +/// .build(); +/// ``` +#[derive(Clone, Default, Debug)] +pub struct OpenBuilder(Config); + +impl OpenBuilder { + /// Returns a new `Builder` with no options configured. + pub fn empty() -> Self { + Self(Config::empty()) + } + + /// Consumes this `Builder` and returns a new [`BeeBuilder`] using the given [`Queen`] to + /// create [`Worker`]s. + /// + /// # Examples + /// + /// ``` + /// # use beekeeper::hive::prelude::*; + /// # use beekeeper::bee::{Context, QueenMut, Worker, WorkerResult}; + /// + /// #[derive(Debug)] + /// struct CounterWorker { + /// index: usize, + /// input_count: usize, + /// input_sum: usize, + /// } + /// + /// impl CounterWorker { + /// fn new(index: usize) -> Self { + /// Self { + /// index, + /// input_count: 0, + /// input_sum: 0, + /// } + /// } + /// } + /// + /// impl Worker for CounterWorker { + /// type Input = usize; + /// type Output = String; + /// type Error = (); + /// + /// fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { + /// self.input_count += 1; + /// self.input_sum += input; + /// let s = format!( + /// "CounterWorker {}: Input {}, Count {}, Sum {}", + /// self.index, input, self.input_count, self.input_sum + /// ); + /// Ok(s) + /// } + /// } + /// + /// #[derive(Debug, Default)] + /// struct CounterQueen { + /// num_workers: usize + /// } + /// + /// impl QueenMut for CounterQueen { + /// type Kind = CounterWorker; + /// + /// fn create(&mut self) -> Self::Kind { + /// self.num_workers += 1; + /// CounterWorker::new(self.num_workers) + /// } + /// } + /// + /// # fn main() { + /// let hive = channel_builder(false) + /// .num_threads(8) + /// .thread_stack_size(4_000_000) + /// .with_queen_mut_default::() + /// .build(); + /// + /// for i in 0..100 { + /// hive.apply_store(i); + /// } + /// let husk = hive.try_into_husk(false).unwrap(); + /// assert_eq!(husk.queen().get().num_workers, 8); + /// # } + /// ``` + pub fn with_queen(self, queen: Q) -> BeeBuilder { + BeeBuilder::from_config_and_queen(self.0, queen) + } + + /// Consumes this `Builder` and returns a new [`BeeBuilder`] using a [`Queen`] created with + /// [`Q::default()`](std::default::Default) to create [`Worker`]s. + pub fn with_queen_default(self) -> BeeBuilder { + BeeBuilder::from_config_and_queen(self.0, Q::default()) + } + + /// Consumes this `Builder` and returns a new [`BeeBuilder`] using a [`QueenCell`] wrapping + /// the given [`QueenMut`] to create [`Worker`]s. + pub fn with_queen_mut(self, queen: Q) -> BeeBuilder> { + BeeBuilder::from_config_and_queen(self.0, QueenCell::new(queen)) + } + + /// Consumes this `Builder` and returns a new [`BeeBuilder`] using a [`QueenMut`] created with + /// [`Q::default()`](std::default::Default) to create [`Worker`]s. + pub fn with_queen_mut_default(self) -> BeeBuilder> { + BeeBuilder::from_config_and_queen(self.0, QueenCell::new(Q::default())) + } + + /// Consumes this `Builder` and returns a new [`BeeBuilder`] with [`Worker`]s created by + /// cloning `worker`. + /// + /// # Examples + /// + /// ``` + /// # use beekeeper::hive::prelude::*; + /// # use beekeeper::bee::{Context, Worker, WorkerResult}; + /// + /// #[derive(Debug, Clone)] + /// struct MathWorker(isize); + /// + /// impl MathWorker { + /// fn new(left_operand: isize) -> Self { + /// assert!(left_operand != 0); + /// Self(left_operand) + /// } + /// } + /// + /// impl Worker for MathWorker { + /// type Input = (isize, u8); + /// type Output = isize; + /// type Error = (); + /// + /// fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { + /// let (operand, operator) = input; + /// let value = match operator % 4 { + /// 0 => operand + self.0, + /// 1 => operand - self.0, + /// 2 => operand * self.0, + /// 3 => operand / self.0, + /// _ => unreachable!(), + /// }; + /// Ok(value) + /// } + /// } + /// + /// # fn main() { + /// let hive = channel_builder(false) + /// .num_threads(8) + /// .thread_stack_size(4_000_000) + /// .with_worker(MathWorker(5isize)) + /// .build(); + /// + /// let sum: isize = hive + /// .map((0..100).zip((0..4).cycle())) + /// .into_outputs() + /// .sum(); + /// assert_eq!(sum, 8920); + /// # } + /// ``` + pub fn with_worker(self, worker: W) -> BeeBuilder> + where + W: Worker + Send + Sync + Clone, + { + BeeBuilder::from_config_and_queen(self.0, CloneQueen::new(worker)) + } + + /// Consumes this `Builder` and returns a new [`BeeBuilder`] with [`Worker`]s created using + /// [`W::default()`](std::default::Default). + /// + /// # Examples + /// + /// ``` + /// # use beekeeper::hive::prelude::*; + /// # use beekeeper::bee::{Context, Worker, WorkerResult}; + /// # use std::num::NonZeroIsize; + /// + /// #[derive(Debug, Default)] + /// struct MathWorker(isize); // value is always `0` + /// + /// impl Worker for MathWorker { + /// type Input = (NonZeroIsize, u8); + /// type Output = isize; + /// type Error = (); + /// + /// fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { + /// let (operand, operator) = input; + /// let result = match operator % 4 { + /// 0 => self.0 + operand.get(), + /// 1 => self.0 - operand.get(), + /// 2 => self.0 * operand.get(), + /// 3 => self.0 / operand.get(), + /// _ => unreachable!(), + /// }; + /// Ok(result) + /// } + /// } + /// + /// # fn main() { + /// let hive = channel_builder(false) + /// .num_threads(8) + /// .thread_stack_size(4_000_000) + /// .with_worker_default::() + /// .build(); + /// + /// let sum: isize = hive + /// .map((1..=100).map(|i| NonZeroIsize::new(i).unwrap()).zip((0..4).cycle())) + /// .into_outputs() + /// .sum(); + /// assert_eq!(sum, -25); + /// # } + /// ``` + pub fn with_worker_default(self) -> BeeBuilder> + where + W: Worker + Send + Sync + Default, + { + BeeBuilder::from_config_and_queen(self.0, DefaultQueen::default()) + } + + /// Consumes this `Builder` and returns a new [`ChannelBuilder`] using the current + /// configuration. + pub fn with_channel_queues(self) -> ChannelBuilder { + ChannelBuilder::from(self.0) + } + + /// Consumes this `Builder` and returns a new [`WorkstealingBuilder`] using the current + /// configuration. + pub fn with_workstealing_queues(self) -> WorkstealingBuilder { + WorkstealingBuilder::from(self.0) + } +} + +impl BuilderConfig for OpenBuilder { + fn config_ref(&mut self, _: Token) -> &mut Config { + &mut self.0 + } +} + +impl From for OpenBuilder { + fn from(value: Config) -> Self { + Self(value) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::bee::stock::EchoWorker; + use crate::bee::{CloneQueen, DefaultQueen, Queen, QueenCell, QueenMut}; + use crate::hive::{FullBuilder, TaskQueues, TaskQueuesBuilder}; + use rstest::rstest; + + #[derive(Clone, Default)] + struct TestQueen; + + impl Queen for TestQueen { + type Kind = EchoWorker; + + fn create(&self) -> Self::Kind { + EchoWorker::default() + } + } + + impl QueenMut for TestQueen { + type Kind = EchoWorker; + + fn create(&mut self) -> Self::Kind { + EchoWorker::default() + } + } + + #[rstest] + fn test_create( + #[values(OpenBuilder::empty, OpenBuilder::default)] factory: F, + #[values( + OpenBuilder::with_channel_queues, + OpenBuilder::with_workstealing_queues + )] + with_fn: W, + ) where + F: Fn() -> OpenBuilder, + B: TaskQueuesBuilder, + W: Fn(OpenBuilder) -> B, + { + let open_builder = factory(); + let queue_builder = with_fn(open_builder); + let _hive = queue_builder + .with_worker(EchoWorker::::default()) + .build(); + } + + #[rstest] + fn test_queen( + #[values(OpenBuilder::empty, OpenBuilder::default)] factory: F, + #[values(BeeBuilder::with_channel_queues, BeeBuilder::with_workstealing_queues)] with_fn: W, + ) where + F: Fn() -> OpenBuilder, + T: TaskQueues>, + W: Fn(BeeBuilder) -> FullBuilder, + { + let open_builder = factory(); + let bee_builder = open_builder.with_queen(TestQueen); + let queue_builder = with_fn(bee_builder); + let _hive = queue_builder.build(); + } + + #[rstest] + fn test_queen_default( + #[values(OpenBuilder::empty, OpenBuilder::default)] factory: F, + #[values(BeeBuilder::with_channel_queues, BeeBuilder::with_workstealing_queues)] with_fn: W, + ) where + F: Fn() -> OpenBuilder, + T: TaskQueues>, + W: Fn(BeeBuilder) -> FullBuilder, + { + let open_builder = factory(); + let bee_builder = open_builder.with_queen_default::(); + let queue_builder = with_fn(bee_builder); + let _hive = queue_builder.build(); + } + + #[rstest] + fn test_queen_mut_default( + #[values(OpenBuilder::empty, OpenBuilder::default)] factory: F, + #[values(BeeBuilder::with_channel_queues, BeeBuilder::with_workstealing_queues)] with_fn: W, + ) where + F: Fn() -> OpenBuilder, + T: TaskQueues>, + W: Fn(BeeBuilder>) -> FullBuilder, T>, + { + let open_builder = factory(); + let bee_builder = open_builder.with_queen_mut_default::(); + let queue_builder = with_fn(bee_builder); + let _hive = queue_builder.build(); + } + + #[rstest] + fn test_worker( + #[values(OpenBuilder::empty, OpenBuilder::default)] factory: F, + #[values(BeeBuilder::with_channel_queues, BeeBuilder::with_workstealing_queues)] with_fn: W, + ) where + F: Fn() -> OpenBuilder, + T: TaskQueues>, + W: Fn( + BeeBuilder>>, + ) -> FullBuilder>, T>, + { + let open_builder = factory(); + let bee_builder = open_builder.with_worker(EchoWorker::default()); + let queue_builder = with_fn(bee_builder); + let _hive = queue_builder.build(); + } + + #[rstest] + fn test_worker_default( + #[values(OpenBuilder::empty, OpenBuilder::default)] factory: F, + #[values(BeeBuilder::with_channel_queues, BeeBuilder::with_workstealing_queues)] with_fn: W, + ) where + F: Fn() -> OpenBuilder, + T: TaskQueues>, + W: Fn( + BeeBuilder>>, + ) -> FullBuilder>, T>, + { + let open_builder = factory(); + let bee_builder = open_builder.with_worker_default::>(); + let queue_builder = with_fn(bee_builder); + let _hive = queue_builder.build(); + } +} diff --git a/src/hive/builder/queue.rs b/src/hive/builder/queue.rs new file mode 100644 index 0000000..b3c8f0a --- /dev/null +++ b/src/hive/builder/queue.rs @@ -0,0 +1,124 @@ +use super::{Builder, FullBuilder}; +use crate::bee::{CloneQueen, DefaultQueen, Queen, QueenCell, QueenMut, Worker}; +use crate::hive::TaskQueues; + +/// Trait implemented by builders specialized to a `TaskQueues` type. +pub trait TaskQueuesBuilder: Builder + Clone + Default + Sized { + /// The type of the `TaskQueues` to use when building the `Hive`. + type TaskQueues: TaskQueues; + + /// Creates a new empty `Builder`. + fn empty() -> Self; + + /// Consumes this `Builder` and returns a new [`FullBuilder`] using the given [`Queen`] to + /// create [`Worker`]s. + fn with_queen(self, queen: Q) -> FullBuilder>; + + /// Consumes this `Builder` and returns a new [`FullBuilder`] using a [`Queen`] created with + /// [`Q::default()`](std::default::Default) to create [`Worker`]s. + fn with_queen_default(self) -> FullBuilder> + where + Q: Queen + Default, + { + self.with_queen(Q::default()) + } + + /// Consumes this `Builder` and returns a new [`FullBuilder`] using a [`QueenMut`] created with + /// [`Q::default()`](std::default::Default) to create [`Worker`]s. + fn with_queen_mut_default(self) -> FullBuilder, Self::TaskQueues> + where + Q: QueenMut + Default, + { + self.with_queen(QueenCell::new(Q::default())) + } + + /// Consumes this `Builder` and returns a new [`FullBuilder`] with [`Worker`]s created by + /// cloning `worker`. + fn with_worker(self, worker: W) -> FullBuilder, Self::TaskQueues> + where + W: Worker + Send + Sync + Clone, + { + self.with_queen(CloneQueen::new(worker)) + } + + /// Consumes this `Builder` and returns a new [`FullBuilder`] with [`Worker`]s created using + /// [`W::default()`](std::default::Default). + fn with_worker_default(self) -> FullBuilder, Self::TaskQueues> + where + W: Worker + Send + Sync + Default, + { + self.with_queen(DefaultQueen::default()) + } +} + +pub mod channel { + use super::*; + use crate::hive::builder::{BuilderConfig, Token}; + use crate::hive::{ChannelTaskQueues, Config}; + + /// `TaskQueuesBuilder` implementation for channel-based task queues. + #[derive(Clone, Default, Debug)] + pub struct ChannelBuilder(Config); + + impl BuilderConfig for ChannelBuilder { + fn config_ref(&mut self, _: Token) -> &mut Config { + &mut self.0 + } + } + + impl TaskQueuesBuilder for ChannelBuilder { + type TaskQueues = ChannelTaskQueues; + + fn empty() -> Self { + Self(Config::empty()) + } + + /// Consumes this `Builder` and returns a new [`FullBuilder`] using the given [`Queen`] to + /// create [`Worker`]s. + fn with_queen(self, queen: Q) -> FullBuilder> { + FullBuilder::from_config_and_queen(self.0, queen) + } + } + + impl From for ChannelBuilder { + fn from(value: Config) -> Self { + Self(value) + } + } +} + +pub mod workstealing { + use super::*; + use crate::hive::builder::{BuilderConfig, Token}; + use crate::hive::{Config, WorkstealingTaskQueues}; + + /// `TaskQueuesBuilder` implementation for workstealing-based task queues. + #[derive(Clone, Default, Debug)] + pub struct WorkstealingBuilder(Config); + + impl BuilderConfig for WorkstealingBuilder { + fn config_ref(&mut self, _: Token) -> &mut Config { + &mut self.0 + } + } + + impl TaskQueuesBuilder for WorkstealingBuilder { + type TaskQueues = WorkstealingTaskQueues; + + fn empty() -> Self { + Self(Config::empty()) + } + + /// Consumes this `Builder` and returns a new [`FullBuilder`] using the given [`Queen`] to + /// create [`Worker`]s. + fn with_queen(self, queen: Q) -> FullBuilder> { + FullBuilder::from_config_and_queen(self.0, queen) + } + } + + impl From for WorkstealingBuilder { + fn from(value: Config) -> Self { + Self(value) + } + } +} diff --git a/src/hive/context.rs b/src/hive/context.rs new file mode 100644 index 0000000..d8854ca --- /dev/null +++ b/src/hive/context.rs @@ -0,0 +1,70 @@ +//! Implementation of `crate::bee::LocalContext` for a `Hive`. +use crate::bee::{LocalContext, Queen, TaskId, Worker}; +use crate::hive::{OutcomeSender, Shared, TaskQueues, WorkerQueues}; +use std::fmt; +use std::sync::Arc; + +pub struct HiveLocalContext<'a, W, Q, T> +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ + worker_queues: &'a T::WorkerQueues, + shared: &'a Arc>, + outcome_tx: Option<&'a OutcomeSender>, +} + +impl<'a, W, Q, T> HiveLocalContext<'a, W, Q, T> +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ + /// Creates a new `HiveLocalContext` instance. + pub fn new( + worker_queues: &'a T::WorkerQueues, + shared: &'a Arc>, + outcome_tx: Option<&'a OutcomeSender>, + ) -> Self { + Self { + worker_queues, + shared, + outcome_tx, + } + } +} + +impl LocalContext for HiveLocalContext<'_, W, Q, T> +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ + fn should_cancel_tasks(&self) -> bool { + self.shared.is_suspended() + } + + fn submit_task(&self, input: W::Input) -> TaskId { + let task = self.shared.prepare_task(input, self.outcome_tx); + let task_id = task.id(); + self.worker_queues.push(task); + task_id + } + + #[cfg(test)] + fn thread_index(&self) -> usize { + self.worker_queues.thread_index() + } +} + +impl fmt::Debug for HiveLocalContext<'_, W, Q, T> +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("HiveLocalContext").finish() + } +} diff --git a/src/hive/cores.rs b/src/hive/cores.rs index 77b7f17..249fe03 100644 --- a/src/hive/cores.rs +++ b/src/hive/cores.rs @@ -1,5 +1,6 @@ //! Utilities for pinning worker threads to CPU cores in a `Hive`. -use parking_lot::Mutex; +use core_affinity::{self, CoreId}; +use parking_lot::{Mutex, MutexGuard}; use std::collections::HashSet; use std::ops::{BitOr, BitOrAssign, Sub, SubAssign}; use std::sync::LazyLock; @@ -16,62 +17,55 @@ use std::sync::LazyLock; /// If new cores become available during the life of the program, they are immediately available /// for worker thread scheduling, but they are *not* available for pinning until the /// `refresh()` function is called. -static CORES: LazyLock>> = LazyLock::new(|| { - core_affinity::get_core_ids() - .map(|core_ids| core_ids.into_iter().map(Core::new).collect()) - .or_else(|| Some(Vec::new())) - .map(Mutex::new) - .unwrap() -}); - -/// Updates `CORES` with the currently available CPU core IDs. The correspondence between the -/// index in the sequence and the core ID is maintained for any core IDs already in the sequence. -/// If a previously available core has become unavailable, its `available` flag is set to `false`. -/// Any new cores are appended to the end of the sequence. Returns the number of new cores added to -/// the sequence. -pub fn refresh() -> usize { - let mut cur_ids = CORES.lock(); - let mut new_ids: HashSet<_> = core_affinity::get_core_ids() - .map(|core_ids| core_ids.into_iter().collect()) - .unwrap_or_default(); - cur_ids.iter_mut().for_each(|core| { - if new_ids.contains(&core.id) { - core.available = true; - new_ids.remove(&core.id); - } else { - core.available = false; - } - }); - let num_new_ids = new_ids.len(); - cur_ids.extend(new_ids.into_iter().map(Core::new)); - num_new_ids -} +pub static CORES: LazyLock = LazyLock::new(CoreIds::from_system); -/// Represents a CPU core. -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub struct Core { - /// the OS-specific core ID - id: core_affinity::CoreId, - /// whether this core is currently available for pinning threads - available: bool, -} +/// Global list of CPU core IDs. +/// +/// This is meant to be created at most once, when `CORES` is initialized. +pub struct CoreIds(Mutex>); -impl Core { - /// Creates a new `Core` with `available` set to `true`. - fn new(core_id: core_affinity::CoreId) -> Self { - Self { - id: core_id, - available: true, - } +impl CoreIds { + fn from_system() -> Self { + Self::new( + core_affinity::get_core_ids() + .map(|core_ids| core_ids.into_iter().map(Core::from).collect()) + .unwrap_or_default(), + ) } - /// Attempts to pin the current thread to this CPU core. Returns `true` if the thread was - /// successfully pinned. - /// - /// If the `available` flag is `false`, this immediately returns `false` and does not attempt - /// to pin the thread. - pub fn try_pin_current(&self) -> bool { - self.available && core_affinity::set_for_current(self.id) + fn new(core_ids: Vec) -> Self { + Self(Mutex::new(core_ids)) + } + + fn get(&self, index: usize) -> Option { + self.0.lock().get(index).cloned() + } + + fn update_from(&self, mut new_ids: HashSet) -> usize { + let mut cur_ids = self.0.lock(); + cur_ids.iter_mut().for_each(|core| { + if new_ids.contains(&core.id) { + core.available = true; + new_ids.remove(&core.id); + } else { + core.available = false; + } + }); + let num_new_ids = new_ids.len(); + cur_ids.extend(new_ids.into_iter().map(Core::from)); + num_new_ids + } + + /// Updates `CORES` with the currently available CPU core IDs. The correspondence between the + /// index in the sequence and the core ID is maintained for any core IDs already in the + /// sequence. If a previously available core has become unavailable, its `available` flag is + /// set to `false`. Any new cores are appended to the end of the sequence. Returns the number + /// of new cores added to the sequence. + pub fn refresh(&self) -> usize { + let new_ids: HashSet<_> = core_affinity::get_core_ids() + .map(|core_ids| core_ids.into_iter().collect()) + .unwrap_or_default(); + self.update_from(new_ids) } } @@ -80,16 +74,12 @@ impl Core { /// /// The mapping between CPU indices and core IDs is platform-specific, but the same index is /// guaranteed to always refer to the same physical core. -#[derive(Default, Clone, PartialEq, Eq, Debug)] +#[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct Cores(Vec); impl Cores { - /// Returns an empty `Cores`. - pub fn empty() -> Self { - Self(Vec::new()) - } - - /// Returns a `Cores` set populated with the first `n` CPU indices. + /// Returns a `Cores` set populated with the first `n` CPU indices (up to the number of + /// available cores). pub fn first(n: usize) -> Self { Self(Vec::from_iter(0..n.min(num_cpus::get()))) } @@ -122,24 +112,17 @@ impl Cores { /// Returns the `Core` associated with the specified index if the index exists and the core /// is available, otherwise returns `None`. pub fn get(&self, index: usize) -> Option { - let cores = CORES.lock(); self.0 .get(index) - .and_then(|&index| cores.get(index).cloned()) + .and_then(|&index| CORES.get(index)) .filter(|core| core.available) } /// Returns an iterator over `(core_index, Option)`, where `Some(core)` can be used to /// set the core affinity of the current thread. The `core` will be `None` for cores that are /// not currently available. - pub fn iter(&self) -> impl Iterator)> + '_ { - let cores = CORES.lock(); - self.0.iter().cloned().map(move |index| { - ( - index, - cores.get(index).filter(|core| core.available).cloned(), - ) - }) + pub fn iter(&self) -> impl Iterator)> { + CoreIter::new(self.0.iter().cloned()) } } @@ -187,13 +170,105 @@ impl> From for Cores { } } +/// Iterator over core (index, id) tuples. This itertor holds the `MutexGuard` for the shared +/// global `CoreIds`, so only one thread can iterate at a time. +pub struct CoreIter<'a, I: Iterator> { + index_iter: I, + cores: MutexGuard<'a, Vec>, +} + +impl> CoreIter<'_, I> { + fn new(index_iter: I) -> Self { + Self { + index_iter, + cores: CORES.0.lock(), + } + } +} + +impl> Iterator for CoreIter<'_, I> { + type Item = (usize, Option); + + fn next(&mut self) -> Option { + let index = self.index_iter.next()?; + let core = self.cores.get(index).cloned().filter(|core| core.available); + Some((index, core)) + } +} + +/// Represents a CPU core. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Core { + /// the OS-specific core ID + id: CoreId, + /// whether this core is currently available for pinning threads + available: bool, +} + +impl Core { + fn new(id: CoreId, available: bool) -> Self { + Self { id, available } + } + + /// Attempts to pin the current thread to this CPU core. Returns `true` if the thread was + /// successfully pinned. + /// + /// If the `available` flag is `false`, this immediately returns `false` and does not attempt + /// to pin the thread. + pub fn try_pin_current(&self) -> bool { + self.available && core_affinity::set_for_current(self.id) + } +} + +impl From for Core { + /// Creates a new `Core` with `available` set to `true`. + fn from(id: CoreId) -> Self { + Self::new(id, true) + } +} + #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::*; + use std::collections::HashSet; + + #[test] + fn test_core_ids() { + let core_ids = CoreIds::new((0..10usize).map(|id| Core::from(CoreId { id })).collect()); + assert_eq!( + (0..10) + .flat_map(|i| core_ids.get(i).map(|id| id.id)) + .collect::>(), + (0..10).map(|id| CoreId { id }).collect::>() + ); + assert!((0..10).all(|i| core_ids.get(i).map(|id| id.available).unwrap_or_default())); + let new_ids: HashSet = vec![10, 11, 1, 3, 5, 7, 9] + .into_iter() + .map(|id| CoreId { id }) + .collect(); + let num_added = core_ids.update_from(new_ids); + assert_eq!(num_added, 2); + let mut new_core_ids = (0..12) + .flat_map(|i| core_ids.get(i).map(|id| id.id)) + .collect::>(); + new_core_ids.sort(); + assert_eq!( + new_core_ids, + (0..12).map(|id| CoreId { id }).collect::>() + ); + assert_eq!( + (0..12) + .flat_map(|i| core_ids.get(i)) + .filter(|id| id.available) + .count(), + 7 + ); + } #[test] fn test_empty() { - assert_eq!(Cores::empty().0.len(), 0); + assert_eq!(Cores::default().0.len(), 0); } #[test] diff --git a/src/hive/delay.rs b/src/hive/delay.rs deleted file mode 100644 index 7e13186..0000000 --- a/src/hive/delay.rs +++ /dev/null @@ -1,165 +0,0 @@ -use std::cell::UnsafeCell; -use std::cmp::Ordering; -use std::collections::BinaryHeap; -use std::time::{Duration, Instant}; - -/// A queue where each item has an associated `Instant` at which it will be available. -/// -/// This is implemented internally as a `UnsafeCell`. -/// -/// SAFETY: This data structure is designed to enable the queue to be modified by a *single thread* -/// using interior mutability. `UnsafeCell` is used for performance - this is safe so long as the -/// queue is only accessed from a single thread at a time. This data structure is *not* thread-safe. -#[derive(Debug)] -pub struct DelayQueue(UnsafeCell>>); - -impl DelayQueue { - /// Pushes an item onto the queue. Returns the `Instant` at which the item will be available, - /// or an error with `item` if there was an error pushing the item. - pub fn push(&self, item: T, delay: Duration) -> Result { - unsafe { - match self.0.get().as_mut() { - Some(queue) => { - let delayed = Delayed::new(item, delay); - let until = delayed.until; - queue.push(delayed); - Ok(until) - } - None => Err(item), - } - } - } - - /// Returns the `Instant` at which the next item will be available. Returns `None` if the queue - /// is empty. - pub fn next_available(&self) -> Option { - unsafe { - self.0 - .get() - .as_ref() - .and_then(|queue| queue.peek().map(|head| head.until)) - } - } - - /// Returns the item at the head of the queue, if one exists and is available (i.e., its delay - /// has been exceeded), and removes it. - pub fn try_pop(&self) -> Option { - unsafe { - if self - .next_available() - .map(|until| until <= Instant::now()) - .unwrap_or(false) - { - self.0 - .get() - .as_mut() - .and_then(|queue| queue.pop()) - .map(|delayed| delayed.value) - } else { - None - } - } - } - - /// Drains all items from the queue and returns them as an iterator. - pub fn drain(&mut self) -> impl Iterator + '_ { - self.0.get_mut().drain().map(|delayed| delayed.value) - } -} - -unsafe impl Sync for DelayQueue {} - -impl Default for DelayQueue { - fn default() -> Self { - DelayQueue(UnsafeCell::new(BinaryHeap::new())) - } -} - -#[derive(Debug)] -struct Delayed { - value: T, - until: Instant, -} - -impl Delayed { - pub fn new(value: T, delay: Duration) -> Self { - Delayed { - value, - until: Instant::now() + delay, - } - } -} - -/// Implements ordering for `Delayed`, so it can be used to correctly order elements in the -/// `BinaryHeap` of the `DelayQueue`. -/// -/// Earlier entries have higher priority (should be popped first), so they are Greater that later -/// entries. -impl Ord for Delayed { - fn cmp(&self, other: &Delayed) -> Ordering { - other.until.cmp(&self.until) - } -} - -impl PartialOrd for Delayed { - fn partial_cmp(&self, other: &Delayed) -> Option { - Some(self.cmp(other)) - } -} - -impl PartialEq for Delayed { - fn eq(&self, other: &Delayed) -> bool { - self.cmp(other) == Ordering::Equal - } -} - -impl Eq for Delayed {} - -#[cfg(test)] -mod tests { - use super::DelayQueue; - use std::{thread, time::Duration}; - - impl DelayQueue { - fn len(&self) -> usize { - unsafe { self.0.get().as_ref().unwrap().len() } - } - } - - #[test] - fn test_works() { - let queue = DelayQueue::default(); - - queue.push(1, Duration::from_secs(1)).unwrap(); - queue.push(2, Duration::from_secs(2)).unwrap(); - queue.push(3, Duration::from_secs(3)).unwrap(); - - assert_eq!(queue.len(), 3); - assert_eq!(queue.try_pop(), None); - - thread::sleep(Duration::from_secs(1)); - assert_eq!(queue.try_pop(), Some(1)); - assert_eq!(queue.len(), 2); - - thread::sleep(Duration::from_secs(1)); - assert_eq!(queue.try_pop(), Some(2)); - assert_eq!(queue.len(), 1); - - thread::sleep(Duration::from_secs(1)); - assert_eq!(queue.try_pop(), Some(3)); - assert_eq!(queue.len(), 0); - - assert_eq!(queue.try_pop(), None); - } - - #[test] - fn test_into_vec() { - let mut queue = DelayQueue::default(); - queue.push(1, Duration::from_secs(1)).unwrap(); - queue.push(2, Duration::from_secs(2)).unwrap(); - queue.push(3, Duration::from_secs(3)).unwrap(); - let mut v: Vec<_> = queue.drain().collect(); - v.sort(); - assert_eq!(v, vec![1, 2, 3]); - } -} diff --git a/src/hive/hive.rs b/src/hive/hive.rs index b9cf447..7bc886f 100644 --- a/src/hive/hive.rs +++ b/src/hive/hive.rs @@ -1,39 +1,94 @@ -use super::prelude::*; -use super::{Config, DerefOutcomes, HiveInner, OutcomeSender, Shared, SpawnError, TaskSender}; -use crate::atomic::Atomic; -use crate::bee::{DefaultQueen, Queen, TaskId, Worker}; -use crossbeam_utils::Backoff; +use super::{ + ChannelBuilder, ChannelTaskQueues, Config, DerefOutcomes, HiveLocalContext, Husk, Outcome, + OutcomeBatch, OutcomeIteratorExt, OutcomeSender, Sentinel, Shared, SpawnError, TaskInput, + TaskQueues, TaskQueuesBuilder, +}; +use crate::bee::{Context, DefaultQueen, Queen, TaskId, Worker}; +use derive_more::Debug; +use std::borrow::Borrow; use std::collections::HashMap; -use std::fmt::Debug; use std::ops::{Deref, DerefMut}; -use std::sync::{mpsc, Arc}; -use std::thread::{self, JoinHandle}; +use std::sync::Arc; +use std::thread::JoinHandle; #[derive(thiserror::Error, Debug)] #[error("The hive has been poisoned")] pub struct Poisoned; -impl> Hive { +/// A pool of worker threads that each execute the same function. +/// +/// See the [module documentation](crate::hive) for details. +#[derive(Debug)] +pub struct Hive>(Option>>); + +impl> Hive { + /// Creates a new `Hive`. This should only be called from `Builder`. + /// + /// The `Hive` will attempt to spawn the configured number of worker threads + /// (`config.num_threads`) but the actual number of threads available may be lower if there + /// are any errors during spawning. + pub(super) fn new(config: Config, queen: Q) -> Self { + let shared = Arc::new(Shared::new(config.into_sync(), queen)); + shared.init_threads(|thread_index| Self::try_spawn(thread_index, &shared)); + Self(Some(shared)) + } +} + +impl, T: TaskQueues> Hive { /// Spawns a new worker thread with the specified index and with access to the `shared` data. - fn try_spawn( + pub fn try_spawn( thread_index: usize, - shared: Arc>, + shared: &Arc>, ) -> Result, SpawnError> { + let thread_builder = shared.thread_builder(); + let shared = Arc::clone(shared); // spawn a thread that executes the worker loop - shared.thread_builder().spawn(move || { - // perform one-time initialization of the worker thread - Self::init_thread(thread_index, &shared); + thread_builder.spawn(move || { + #[cfg(feature = "affinity")] + if let Some(core) = shared.get_core_affinity(thread_index) { + // try to pin the worker thread to a specific CPU core. + core.try_pin_current(); + } // create a Sentinel that will spawn a new thread on panic until it is cancelled - let sentinel = Sentinel::new(thread_index, Arc::clone(&shared)); - // create a new Worker instance + let sentinel = Sentinel::new(thread_index, Arc::clone(&shared), Self::try_spawn); + // get the thread-local interface to the task queues + let worker_queues = shared.worker_queues(thread_index); + // create a new worker to process tasks let mut worker = shared.create_worker(); - // execute the main loop - // get the next task to process - this decrements the queued counter and increments - // the active counter - while let Ok(task) = shared.next_task(thread_index) { + // execute the main loop: get the next task to process, which decrements the queued + // counter and increments the active counter + while let Some(task) = shared.get_next_task(&worker_queues) { + let (input, task_meta, outcome_tx) = task.into_parts(); + let local_ctx = HiveLocalContext::new(&worker_queues, &shared, outcome_tx.as_ref()); + let apply_ctx = Context::new(task_meta, Some(&local_ctx)); // execute the task until it succeeds or we reach maximum retries - this should // be the only place where a panic can occur - Self::execute(task, thread_index, &mut worker, &shared); + let result = worker.apply(input, &apply_ctx); + let (task_meta, subtask_ids) = apply_ctx.into_parts(); + let outcome = match result { + #[cfg(feature = "retry")] + Err(crate::bee::ApplyError::Retryable { input, error }) + if subtask_ids.is_none() && shared.can_retry(&task_meta) => + { + match shared.try_send_retry( + input, + task_meta, + outcome_tx.as_ref(), + &worker_queues, + ) { + Ok(_) => return, + // currently, the only implementation of retry queue cannot be put into + // a state where `try_send_retry` fails, so this cannot be tested + #[cfg_attr(coverage_nightly, coverage(off))] + Err(task) => { + let (input, task_meta, _) = task.into_parts(); + Outcome::from_fatal(input, task_meta, error) + } + } + } + result => Outcome::from_worker_result(result, task_meta, subtask_ids), + }; + shared.send_or_store_outcome(outcome, outcome_tx); // finish the task - decrements the active counter and notifies other threads shared.finish_task(false); } @@ -43,28 +98,6 @@ impl> Hive { }) } - /// Creates a new `Hive`. This should only be called from `Builder`. - /// - /// The `Hive` will attempt to spawn the configured number of worker threads - /// (`config.num_threads`) but the actual number of threads available may be lower if there - /// are any errors during spawning. - pub(super) fn new(config: Config, queen: Q) -> Self { - let (task_tx, task_rx) = mpsc::channel(); - let shared = Arc::new(Shared::new(config.into_sync(), queen, task_rx)); - shared.init_threads(|thread_index| Self::try_spawn(thread_index, Arc::clone(&shared))); - Self(Some(HiveInner { task_tx, shared })) - } - - #[inline] - fn task_tx(&self) -> &TaskSender { - &self.0.as_ref().unwrap().task_tx - } - - #[inline] - fn shared(&self) -> &Arc> { - &self.0.as_ref().unwrap().shared - } - /// Attempts to increase the number of worker threads by `num_threads`. Returns the number of /// new worker threads that were successfully started (which may be fewer than `num_threads`), /// or a `Poisoned` error if the hive has been poisoned. @@ -78,7 +111,7 @@ impl> Hive { return Err(Poisoned); } let num_started = shared.grow_threads(num_threads, |thread_index| { - Self::try_spawn(thread_index, Arc::clone(shared)) + Self::try_spawn(thread_index, shared) }); Ok(num_started) } @@ -91,93 +124,30 @@ impl> Hive { self.grow(num_threads) } - /// Sends one input to the `Hive` for processing and returns its ID. The `Outcome` - /// of the task is sent to the `outcome_tx` channel if provided, otherwise it is retained in - /// the `Hive` for later retrieval. - /// - /// This method is called by all the `*apply*` methods. - fn send_one(&self, input: W::Input, outcome_tx: Option>) -> TaskId { - #[cfg(debug_assertions)] - if self.max_workers() == 0 { - dbg!("WARNING: no worker threads are active for hive"); - } - let shared = self.shared(); - let task = shared.prepare_task(input, outcome_tx); - let task_id = task.id(); - // try to send the task to the hive; if the hive is poisoned or if sending fails, convert - // the task into an `Unprocessed` outcome and try to send it to the outcome channel; if - // that fails, store the outcome in the hive - if let Some(abandoned_task) = if self.is_poisoned() { - Some(task) - } else { - self.task_tx().send(task).err().map(|err| err.0) - } { - shared.abandon_task(abandoned_task); - } - task_id - } - /// Sends one `input` to the `Hive` for procesing and returns the result, blocking until the /// result is available. Creates a channel to send the input and receive the outcome. Returns /// an [`Outcome`] with the task output or an error. - pub fn apply(&self, input: W::Input) -> Outcome { - let (tx, rx) = outcome_channel(); - let task_id = self.send_one(input, Some(tx)); + pub fn apply>>(&self, input: I) -> Outcome { + let (tx, rx) = super::outcome_channel(); + let task_id = self.shared().send_one_global(input, Some(&tx)); + drop(tx); rx.recv().unwrap_or_else(|_| Outcome::Missing { task_id }) } /// Sends one `input` to the `Hive` for processing and returns its ID. The [`Outcome`] of /// the task will be sent to `tx` upon completion. - pub fn apply_send(&self, input: W::Input, tx: OutcomeSender) -> TaskId { - self.send_one(input, Some(tx)) + pub fn apply_send>, X>(&self, input: I, outcome_tx: X) -> TaskId + where + X: Borrow>, + { + self.shared() + .send_one_global(input, Some(outcome_tx.borrow())) } /// Sends one `input` to the `Hive` for processing and returns its ID immediately. The /// [`Outcome`] of the task will be retained and available for later retrieval. - pub fn apply_store(&self, input: W::Input) -> TaskId { - self.send_one(input, None) - } - - /// Sends a `batch` of inputs to the `Hive` for processing, and returns a `Vec` of their - /// task IDs. The [`Outcome`]s of the tasks are sent to the `outcome_tx` channel if provided, - /// otherwise they are retained in the `Hive` for later retrieval. - /// - /// The batch is provided as an [`ExactSizeIterator`], which enables the hive to reserve a - /// range of task IDs (a single atomic operation) rather than one at a time. - /// - /// This method is called by all the `swarm*` methods. - fn send_batch(&self, batch: T, outcome_tx: Option>) -> Vec - where - T: IntoIterator, - T::IntoIter: ExactSizeIterator, - { - #[cfg(debug_assertions)] - if self.max_workers() == 0 { - dbg!("WARNING: no worker threads are active for hive"); - } - let task_tx = self.task_tx(); - let iter = batch.into_iter(); - let (batch_size, _) = iter.size_hint(); - let shared = self.shared(); - let batch = shared.prepare_batch(batch_size, iter, outcome_tx); - if !self.is_poisoned() { - batch - .map(|task| { - let task_id = task.id(); - // try to send the task to the hive; if sending fails, convert the task into an - // `Unprocessed` outcome and try to send it to the outcome channel; if that - // fails, store the outcome in the hive - if let Err(err) = task_tx.send(task) { - shared.abandon_task(err.0); - } - task_id - }) - .collect() - } else { - // if the hive is poisoned, convert all tasks into `Unprocessed` outcomes and try to - // send them to their outcome channels or store them in the hive - self.shared().abandon_batch(batch) - } + pub fn apply_store>>(&self, input: I) -> TaskId { + self.shared().send_one_global(input, None) } /// Sends a `batch` of inputs to the `Hive` for processing, and returns an iterator over the @@ -185,13 +155,15 @@ impl> Hive { /// /// This method is more efficient than [`map`](Self::map) when the input is an /// [`ExactSizeIterator`]. - pub fn swarm(&self, batch: T) -> impl Iterator> + pub fn swarm(&self, batch: B) -> impl Iterator> + use where - T: IntoIterator, - T::IntoIter: ExactSizeIterator, + I: Into>, + B: IntoIterator, + B::IntoIter: ExactSizeIterator, { - let (tx, rx) = outcome_channel(); - let task_ids = self.send_batch(batch, Some(tx)); + let (tx, rx) = super::outcome_channel(); + let task_ids = self.shared().send_batch_global(batch, Some(&tx)); + drop(tx); rx.select_ordered(task_ids) } @@ -202,13 +174,17 @@ impl> Hive { /// instead receive the `Outcome`s in the order they were submitted. This method is more /// efficient than [`map_unordered`](Self::map_unordered) when the input is an /// [`ExactSizeIterator`]. - pub fn swarm_unordered(&self, batch: T) -> impl Iterator> + pub fn swarm_unordered( + &self, + batch: B, + ) -> impl Iterator> + use where - T: IntoIterator, - T::IntoIter: ExactSizeIterator, + I: Into>, + B: IntoIterator, + B::IntoIter: ExactSizeIterator, { - let (tx, rx) = outcome_channel(); - let task_ids = self.send_batch(batch, Some(tx)); + let (tx, rx) = super::outcome_channel(); + let task_ids = self.shared().send_batch_global(batch, Some(&tx)); rx.select_unordered(task_ids) } @@ -217,39 +193,45 @@ impl> Hive { /// /// This method is more efficient than [`map_send`](Self::map_send) when the input is an /// [`ExactSizeIterator`]. - pub fn swarm_send(&self, batch: T, outcome_tx: OutcomeSender) -> Vec + pub fn swarm_send(&self, batch: B, outcome_tx: S) -> Vec where - T: IntoIterator, - T::IntoIter: ExactSizeIterator, + I: Into>, + B: IntoIterator, + B::IntoIter: ExactSizeIterator, + S: Borrow>, { - self.send_batch(batch, Some(outcome_tx)) + self.shared() + .send_batch_global(batch, Some(outcome_tx.borrow())) } /// Sends a `batch` of inputs to the `Hive` for processing, and returns a [`Vec`] of task IDs. /// The [`Outcome`]s of the task are retained and available for later retrieval. /// /// This method is more efficient than `map_store` when the input is an [`ExactSizeIterator`]. - pub fn swarm_store(&self, batch: T) -> Vec + pub fn swarm_store(&self, batch: B) -> Vec where - T: IntoIterator, - T::IntoIter: ExactSizeIterator, + I: Into>, + B: IntoIterator, + B::IntoIter: ExactSizeIterator, { - self.send_batch(batch, None) + self.shared().send_batch_global(batch, None) } /// Iterates over `inputs` and sends each one to the `Hive` for processing and returns an /// iterator over the [`Outcome`]s in the same order as the inputs. /// /// [`swarm`](Self::swarm) should be preferred when `inputs` is an [`ExactSizeIterator`]. - pub fn map( - &self, - inputs: impl IntoIterator, - ) -> impl Iterator> { - let (tx, rx) = outcome_channel(); - let task_ids: Vec<_> = inputs + pub fn map(&self, batch: B) -> impl Iterator> + use + where + I: Into>, + B: IntoIterator, + { + let (tx, rx) = super::outcome_channel(); + let task_ids: Vec<_> = batch .into_iter() - .map(|task| self.apply_send(task, tx.clone())) + .map(|task| self.apply_send(task, &tx)) .collect(); + drop(tx); rx.select_ordered(task_ids) } @@ -258,16 +240,21 @@ impl> Hive { /// /// [`swarm_unordered`](Self::swarm_unordered) should be preferred when `inputs` is an /// [`ExactSizeIterator`]. - pub fn map_unordered( + pub fn map_unordered( &self, - inputs: impl IntoIterator, - ) -> impl Iterator> { - let (tx, rx) = outcome_channel(); + batch: B, + ) -> impl Iterator> + use + where + I: Into>, + B: IntoIterator, + { + let (tx, rx) = super::outcome_channel(); // `map` is required (rather than `inspect`) because we need owned items - let task_ids: Vec<_> = inputs + let task_ids: Vec<_> = batch .into_iter() - .map(|task| self.apply_send(task, tx.clone())) + .map(|task| self.apply_send(task, &tx)) .collect(); + drop(tx); rx.select_unordered(task_ids) } @@ -276,14 +263,15 @@ impl> Hive { /// /// [`swarm_send`](Self::swarm_send) should be preferred when `inputs` is an /// [`ExactSizeIterator`]. - pub fn map_send( - &self, - inputs: impl IntoIterator, - tx: OutcomeSender, - ) -> Vec { - inputs + pub fn map_send(&self, batch: B, outcome_tx: X) -> Vec + where + I: Into>, + B: IntoIterator, + X: Borrow>, + { + batch .into_iter() - .map(|input| self.apply_send(input, tx.clone())) + .map(|input| self.apply_send(input, outcome_tx.borrow())) .collect() } @@ -292,8 +280,12 @@ impl> Hive { /// /// [`swarm_store`](Self::swarm_store) should be preferred when `inputs` is an /// [`ExactSizeIterator`]. - pub fn map_store(&self, inputs: impl IntoIterator) -> Vec { - inputs + pub fn map_store(&self, batch: B) -> Vec + where + I: Into>, + B: IntoIterator, + { + batch .into_iter() .map(|input| self.apply_store(input)) .collect() @@ -302,17 +294,15 @@ impl> Hive { /// Iterates over `items` and calls `f` with a mutable reference to a state value (initialized /// to `init`) and each item. `F` returns an input that is sent to the `Hive` for processing. /// Returns an [`OutcomeBatch`] of the outputs and the final state value. - pub fn scan( - &self, - items: impl IntoIterator, - init: St, - f: F, - ) -> (OutcomeBatch, St) + pub fn scan(&self, batch: B, init: S, f: F) -> (OutcomeBatch, S) where - F: FnMut(&mut St, T) -> W::Input, + B: IntoIterator, + O: Into>, + F: FnMut(&mut S, I) -> O, { - let (tx, rx) = outcome_channel(); - let (task_ids, fold_value) = self.scan_send(items, tx, init, f); + let (tx, rx) = super::outcome_channel(); + let (task_ids, fold_value) = self.scan_send(batch, &tx, init, f); + drop(tx); let outcomes = rx.select_unordered(task_ids).into(); (outcomes, fold_value) } @@ -321,26 +311,29 @@ impl> Hive { /// to `init`) and each item. `F` returns an input that is sent to the `Hive` for processing, /// or an error. Returns an [`OutcomeBatch`] of the outputs, a [`Vec`] of errors, and the final /// state value. - pub fn try_scan( + pub fn try_scan( &self, - items: impl IntoIterator, - init: St, + batch: B, + init: S, mut f: F, - ) -> (OutcomeBatch, Vec, St) + ) -> (OutcomeBatch, Vec, S) where - F: FnMut(&mut St, T) -> Result, + O: Into>, + B: IntoIterator, + F: FnMut(&mut S, I) -> Result, { - let (tx, rx) = outcome_channel(); - let (task_ids, errors, fold_value) = items.into_iter().fold( + let (tx, rx) = super::outcome_channel(); + let (task_ids, errors, fold_value) = batch.into_iter().fold( (Vec::new(), Vec::new(), init), |(mut task_ids, mut errors, mut acc), inp| { match f(&mut acc, inp) { - Ok(input) => task_ids.push(self.apply_send(input, tx.clone())), + Ok(input) => task_ids.push(self.apply_send(input, &tx)), Err(err) => errors.push(err), } (task_ids, errors, acc) }, ); + drop(tx); let outcomes = rx.select_unordered(task_ids).into(); (outcomes, errors, fold_value) } @@ -349,21 +342,24 @@ impl> Hive { /// to `init`) and each item. `f` returns an input that is sent to the `Hive` for processing. /// The outputs are sent to `tx` in the order they become available. Returns a [`Vec`] of the /// task IDs and the final state value. - pub fn scan_send( + pub fn scan_send( &self, - items: impl IntoIterator, - tx: OutcomeSender, - init: St, + batch: B, + outcome_tx: X, + init: S, mut f: F, - ) -> (Vec, St) + ) -> (Vec, S) where - F: FnMut(&mut St, T) -> W::Input, + O: Into>, + B: IntoIterator, + X: Borrow>, + F: FnMut(&mut S, I) -> O, { - items + batch .into_iter() .fold((Vec::new(), init), |(mut task_ids, mut acc), item| { let input = f(&mut acc, item); - task_ids.push(self.apply_send(input, tx.clone())); + task_ids.push(self.apply_send(input, outcome_tx.borrow())); (task_ids, acc) }) } @@ -373,20 +369,25 @@ impl> Hive { /// or an error. The outputs are sent to `tx` in the order they become available. This /// function returns the final state value and a [`Vec`] of results, where each result is /// either a task ID or an error. - pub fn try_scan_send( + pub fn try_scan_send( &self, - items: impl IntoIterator, - tx: OutcomeSender, - init: St, + batch: B, + outcome_tx: X, + init: S, mut f: F, - ) -> (Vec>, St) + ) -> (Vec>, S) where - F: FnMut(&mut St, T) -> Result, + O: Into>, + B: IntoIterator, + X: Borrow>, + F: FnMut(&mut S, I) -> Result, { - items + batch .into_iter() .fold((Vec::new(), init), |(mut results, mut acc), inp| { - results.push(f(&mut acc, inp).map(|input| self.apply_send(input, tx.clone()))); + results.push( + f(&mut acc, inp).map(|input| self.apply_send(input, outcome_tx.borrow())), + ); (results, acc) }) } @@ -395,16 +396,13 @@ impl> Hive { /// to `init`) and each item. `f` returns an input that is sent to the `Hive` for processing. /// This function returns the final state value and a [`Vec`] of task IDs. The [`Outcome`]s of /// the tasks are retained and available for later retrieval. - pub fn scan_store( - &self, - items: impl IntoIterator, - init: St, - mut f: F, - ) -> (Vec, St) + pub fn scan_store(&self, batch: B, init: S, mut f: F) -> (Vec, S) where - F: FnMut(&mut St, T) -> W::Input, + O: Into>, + B: IntoIterator, + F: FnMut(&mut S, I) -> O, { - items + batch .into_iter() .fold((Vec::new(), init), |(mut task_ids, mut acc), item| { let input = f(&mut acc, item); @@ -418,16 +416,18 @@ impl> Hive { /// or an error. This function returns the final value of the state value and a [`Vec`] of /// results, where each result is either a task ID or an error. The [`Outcome`]s of the /// tasks are retained and available for later retrieval. - pub fn try_scan_store( + pub fn try_scan_store( &self, - items: impl IntoIterator, - init: St, + batch: B, + init: S, mut f: F, - ) -> (Vec>, St) + ) -> (Vec>, S) where - F: FnMut(&mut St, T) -> Result, + O: Into>, + B: IntoIterator, + F: FnMut(&mut S, I) -> Result, { - items + batch .into_iter() .fold((Vec::new(), init), |(mut results, mut acc), item| { results.push(f(&mut acc, item).map(|input| self.apply_store(input))); @@ -440,27 +440,23 @@ impl> Hive { self.shared().wait_on_done(); } - /// Returns the [`MutexGuard`](parking_lot::MutexGuard) for the [`Queen`]. - /// - /// Note that the `Queen` will remain locked until the returned guard is dropped, and that - /// locking the `Queen` prevents new worker threads from being started. - pub fn queen(&self) -> impl Deref + '_ { - self.shared().queen.lock() + /// Returns a read-only reference to the [`Queen`]. + pub fn queen(&self) -> &Q { + self.shared().queen() } /// Returns the number of worker threads that have been requested, i.e., the maximum number of /// tasks that could be processed concurrently. This may be greater than - /// [`active_workers`](Self::active_workers) if any of the worker threads failed to start. + /// [`alive_workers`](Self::alive_workers) if any of the worker threads failed to start. pub fn max_workers(&self) -> usize { - self.shared().config.num_threads.get_or_default() + self.shared().num_threads() } /// Returns the number of worker threads that have been successfully started. This may be /// fewer than [`max_workers`](Self::max_workers) if any of the worker threads failed to start. pub fn alive_workers(&self) -> usize { self.shared() - .spawn_results - .lock() + .spawn_results() .iter() .filter(|result| result.is_ok()) .count() @@ -469,8 +465,7 @@ impl> Hive { /// Returns `true` if there are any "dead" worker threads that failed to spawn. pub fn has_dead_workers(&self) -> bool { self.shared() - .spawn_results - .lock() + .spawn_results() .iter() .any(|result| result.is_err()) } @@ -479,8 +474,7 @@ impl> Hive { /// successfully respawned. pub fn revive_workers(&self) -> usize { let shared = self.shared(); - shared - .respawn_dead_threads(|thread_index| Self::try_spawn(thread_index, Arc::clone(shared))) + shared.respawn_dead_threads(|thread_index| Self::try_spawn(thread_index, shared)) } /// Returns the number of tasks currently (queued for processing, being processed). @@ -490,15 +484,15 @@ impl> Hive { /// Returns the number of times one of this `Hive`'s worker threads has panicked. pub fn num_panics(&self) -> usize { - self.shared().num_panics.get() + self.shared().num_panics() } /// Returns `true` if this `Hive` has been poisoned - i.e., its internal state has been /// corrupted such that it is no longer able to process tasks. /// /// Note that, when a `Hive` is poisoned, it is still possible to call methods that extract - /// its stored [`Outcome`]s (e.g., [`take_stored`](Self::take_stored)) or consume it (e.g., - /// [`try_into_husk`](Self::try_into_husk)). + /// its stored [`Outcome`]s (e.g., [`remove_all`](crate::hive::OutcomeStore::remove_all)) or + /// consume it (e.g., [`try_into_husk`](Self::try_into_husk)). pub fn is_poisoned(&self) -> bool { self.shared().is_poisoned() } @@ -522,15 +516,16 @@ impl> Hive { /// /// ``` /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; - /// use beekeeper::hive::Builder; + /// use beekeeper::hive::prelude::*; /// use std::thread; /// use std::time::Duration; /// /// # fn main() { - /// let hive = Builder::new() + /// let hive = channel_builder(false) /// .num_threads(4) - /// .build_with_default::>(); - /// hive.map((0..10).map(|_| Thunk::of(|| thread::sleep(Duration::from_secs(3))))); + /// .with_worker_default::>() + /// .build(); + /// hive.map((0..10).map(|_| Thunk::from(|| thread::sleep(Duration::from_secs(3))))); /// thread::sleep(Duration::from_secs(1)); // Allow first set of tasks to be started. /// // There should be 4 active tasks and 6 queued tasks. /// hive.suspend(); @@ -553,6 +548,24 @@ impl> Hive { self.shared().set_suspended(false); } + /// Re-submits any unprocessed tasks for processing, with their results to be sent to `tx`. + /// + /// Returns a [`Vec`] of task IDs that were submitted. + pub fn swarm_unprocessed_send>>( + &self, + outcome_tx: X, + ) -> Vec { + self.swarm_send(self.take_unprocessed_inputs(), outcome_tx) + } + + /// Re-submits any unprocessed tasks for processing, with their results to be stored in the + /// hive. + /// + /// Returns a [`Vec`] of task IDs that were resumed. + pub fn swarm_unprocessed_store(&self) -> Vec { + self.swarm_store(self.take_unprocessed_inputs()) + } + /// Removes all `Unprocessed` outcomes from this `Hive` and returns them as an iterator over /// the input values. fn take_unprocessed_inputs(&self) -> impl ExactSizeIterator { @@ -565,29 +578,35 @@ impl> Hive { }) } - /// If this `Hive` is suspended, resumes this `Hive` and re-submits any unprocessed tasks for - /// processing, with their results to be sent to `tx`. Returns a [`Vec`] of task IDs that - /// were resumed. - pub fn resume_send(&self, outcome_tx: OutcomeSender) -> Vec { - self.shared() - .set_suspended(false) - .then(|| self.swarm_send(self.take_unprocessed_inputs(), outcome_tx)) - .unwrap_or_default() - } - - /// If this `Hive` is suspended, resumes this `Hive` and re-submit any unprocessed tasks for - /// processing, with their results to be stored in the queue. Returns a [`Vec`] of task IDs - /// that were resumed. - pub fn resume_store(&self) -> Vec { - self.shared() - .set_suspended(false) - .then(|| self.swarm_store(self.take_unprocessed_inputs())) - .unwrap_or_default() + /// Consumes this `Hive` and attempts to shut it down gracefully. + /// + /// If this `Hive` has been cloned, and those clones have not been dropped, this method returns + /// `false`. + /// + /// This closes the task queues so that no more tasks may be submitted. If `urgent` is `true`, + /// worker threads are also prevented from taking any more tasks from the queues, and all + /// queued tasks are converted to `Unprocessed` outcomes and sent or discarded; otherwise, + /// this method blocks while all queued tasks are processed. + /// + /// Note that it is not necessary to call this method explicitly - all resources are dropped + /// automatically when the last clone of the hive is dropped. + pub fn close(self, urgent: bool) -> bool { + self.try_close(urgent).is_some() } - /// Returns all stored outcomes as a [`HashMap`] of task IDs to `Outcome`s. - pub fn take_stored(&self) -> HashMap> { - self.shared().take_outcomes() + /// Consumes this `Hive` and returns a map of stored outcomes. + /// + /// If this `Hive` has been cloned, and those clones have not been dropped, this method + /// returns `None` since it cannot take exclusive ownership of the internal shared data. + /// + /// This closes the task queues so that no more tasks may be submitted. If `urgent` is `true`, + /// worker threads are also prevented from taking any more tasks from the queues, and all + /// queued tasks are converted to `Unprocessed` outcomes and sent or stored; otherwise, + /// this method blocks while all queued tasks are processed. + /// + /// This method first joins on the `Hive` to wait for all tasks to finish. + pub fn try_into_outcomes(self, urgent: bool) -> Option>> { + self.try_close(urgent).map(|shared| shared.into_outcomes()) } /// Consumes this `Hive` and attempts to return a [`Husk`] containing the remnants of this @@ -597,85 +616,97 @@ impl> Hive { /// If this `Hive` has been cloned, and those clones have not been dropped, this method /// returns `None` since it cannot take exclusive ownership of the internal shared data. /// + /// This closes the task queues so that no more tasks may be submitted. If `urgent` is `true`, + /// worker threads are also prevented from taking any more tasks from the queues, and all + /// queued tasks are converted to `Unprocessed` outcomes and sent or stored; otherwise, + /// this method blocks while all queued tasks are processed. + /// /// This method first joins on the `Hive` to wait for all tasks to finish. - pub fn try_into_husk(mut self) -> Option> { + pub fn try_into_husk(self, urgent: bool) -> Option> { + self.try_close(urgent).map(|shared| shared.into_husk()) + } + + /// Consumes this `Hive` and attempts to acquire the shared data object. + /// + /// This closes the task queues so that no more tasks may be submitted. If `urgent` is `true`, + /// worker threads are also prevented from taking any more tasks from the queues; otherwise, + /// this method blocks while all queued are processed. + /// + /// If this `Hive` has been cloned, and those clones have not been dropped, this method returns + /// `None`. + fn try_close(mut self, urgent: bool) -> Option> { if self.shared().num_referrers() > 1 { return None; } // take the inner value and replace it with `None` - let inner = self.0.take().unwrap(); + let shared = self.0.take().unwrap(); + // close the global queue to prevent new tasks from being submitted + shared.close_task_queues(urgent); // wait for all tasks to finish - inner.shared.wait_on_done(); - // drop the task sender so receivers will drop automatically - drop(inner.task_tx); - // wait for worker threads to drop, then take ownership of the shared data and convert it - // into a Husk - let mut shared = inner.shared; - let mut backoff = None::; - loop { - // TODO: may want to have some timeout or other kind of limit to prevent this from - // looping forever if a worker thread somehow gets stuck, or if the `num_referrers` - // counter is corrupted - shared = match Arc::try_unwrap(shared) { - Ok(shared) => { - return Some(shared.try_into_husk()); - } - Err(shared) => { - backoff.get_or_insert_with(Backoff::new).spin(); - shared - } - }; - } + shared.wait_on_done(); + // unwrap the Arc and return the inner Shared value + Some( + super::util::unwrap_arc(shared) + .expect("timeout waiting to take ownership of shared data"), + ) + } + + #[inline] + fn shared(&self) -> &Arc> { + self.0.as_ref().unwrap() } } -impl Default for Hive> { +pub type DefaultHive = Hive, ChannelTaskQueues>; + +impl Default for DefaultHive { fn default() -> Self { - Builder::default().build_with_default::() + ChannelBuilder::default().with_worker_default().build() } } -impl> Clone for Hive { +impl Clone for Hive +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ /// Creates a shallow copy of this `Hive` containing references to its same internal state, /// i.e., all clones of a `Hive` submit tasks to the same shared worker thread pool. fn clone(&self) -> Self { - let inner = self.0.as_ref().unwrap(); - self.shared().referrer_is_cloning(); - Self(Some(inner.clone())) - } -} - -impl> Clone for HiveInner { - fn clone(&self) -> Self { - HiveInner { - task_tx: self.task_tx.clone(), - shared: Arc::clone(&self.shared), - } + let shared = self.0.as_ref().unwrap(); + shared.referrer_is_cloning(); + Self(Some(shared.clone())) } } -impl> Debug for Hive { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if let Some(inner) = self.0.as_ref() { - f.debug_struct("Hive") - .field("task_tx", &inner.task_tx) - .field("shared", &inner.shared) - .finish() - } else { - f.write_str("Hive {}") - } +impl PartialEq for Hive +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ + fn eq(&self, other: &Hive) -> bool { + let self_shared = self.shared(); + let other_shared = &other.shared(); + Arc::ptr_eq(self_shared, other_shared) } } -impl> PartialEq for Hive { - fn eq(&self, other: &Hive) -> bool { - Arc::ptr_eq(self.shared(), other.shared()) - } +impl Eq for Hive +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ } -impl> Eq for Hive {} - -impl> DerefOutcomes for Hive { +impl DerefOutcomes for Hive +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ #[inline] fn outcomes_deref(&self) -> impl Deref>> { self.shared().outcomes() @@ -687,90 +718,39 @@ impl> DerefOutcomes for Hive { } } -impl> Drop for Hive { +impl Drop for Hive +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ fn drop(&mut self) { // if this Hive has already been turned into a Husk, it's inner value will be `None` - if let Some(inner) = self.0.as_ref() { + if let Some(shared) = self.0.as_ref() { // reduce the referrer count - let _ = inner.shared.referrer_is_dropping(); + let _ = shared.referrer_is_dropping(); // if this Hive is the only one with a pointer to the shared data, poison it // to prevent any worker threads that still have access to the shared data from // re-spawning. - if inner.shared.num_referrers() == 0 { - inner.shared.poison(); + if shared.num_referrers() == 0 { + shared.poison(); } } } } -/// Sentinel for a worker thread. Until the sentinel is cancelled, it will respawn the worker -/// thread if it panics. -struct Sentinel> { - thread_index: usize, - shared: Arc>, - active: bool, -} - -impl> Sentinel { - fn new(thread_index: usize, shared: Arc>) -> Self { - Self { - thread_index, - shared, - active: true, - } - } - - /// Cancel and destroy this sentinel. - fn cancel(mut self) { - self.active = false; - } -} - -impl> Drop for Sentinel { - fn drop(&mut self) { - if self.active { - // if the sentinel is active, that means the thread panicked during task execution, so - // we have to finish the task here before respawning - self.shared.finish_task(thread::panicking()); - // only respawn if the sentinel is active and the hive has not been poisoned - if !self.shared.is_poisoned() { - // can't do anything with the previous result - let _ = self - .shared - .respawn_thread(self.thread_index, |thread_index| { - Hive::try_spawn(thread_index, Arc::clone(&self.shared)) - }); - } - } - } -} - -#[cfg(not(feature = "affinity"))] -mod no_affinity { - use crate::bee::{Queen, Worker}; - use crate::hive::{Hive, Shared}; - - impl> Hive { - #[inline] - pub(super) fn init_thread(_: usize, _: &Shared) {} - } -} - #[cfg(feature = "affinity")] mod affinity { use crate::bee::{Queen, Worker}; use crate::hive::cores::Cores; - use crate::hive::{Hive, Poisoned, Shared}; - - impl> Hive { - /// Tries to pin the worker thread to a specific CPU core. - #[inline] - pub(super) fn init_thread(thread_index: usize, shared: &Shared) { - if let Some(core) = shared.get_core_affinity(thread_index) { - core.try_pin_current(); - } - } + use crate::hive::{Hive, Poisoned, TaskQueues}; + impl Hive + where + W: Worker, + Q: Queen, + T: TaskQueues, + { /// Attempts to increase the number of worker threads by `num_threads`. /// /// The provided `affinity` specifies additional CPU core indices to which the worker @@ -799,94 +779,109 @@ mod affinity { } } -#[cfg(feature = "batching")] -mod batching { +#[cfg(feature = "local-batch")] +mod local_batch { use crate::bee::{Queen, Worker}; - use crate::hive::Hive; + use crate::hive::{Hive, TaskQueues}; - impl> Hive { - /// Returns the batch size for worker threads. - pub fn worker_batch_size(&self) -> usize { - self.shared().batch_size() + impl Hive + where + W: Worker, + Q: Queen, + T: TaskQueues, + { + /// Returns the batch limit for worker threads. + pub fn worker_batch_limit(&self) -> usize { + self.shared().worker_batch_limit() } - /// Sets the batch size for worker threads. This will block the current thread until all - /// worker thread queues can be resized. - pub fn set_worker_batch_size(&self, batch_size: usize) { - self.shared().set_batch_size(batch_size); + /// Sets the batch limit for worker threads. + /// + /// Depending on this hive's `TaskQueues` implementation, this method may: + /// * have no effect (if it does not support local batching) + /// * block the current thread until all worker thread queues can be resized. + pub fn set_worker_batch_limit(&self, batch_limit: usize) { + self.shared().set_worker_batch_limit(batch_limit); } - } -} -#[cfg(not(feature = "retry"))] -mod no_retry { - use crate::bee::{Queen, Worker}; - use crate::hive::{Hive, Outcome, Shared, Task}; - - impl> Hive { - #[inline] - pub(super) fn execute( - task: Task, - _thread_index: usize, - worker: &mut W, - shared: &Shared, - ) { - let (input, ctx, outcome_tx) = task.into_parts(); - let result = worker.apply(input, &ctx); - let outcome = Outcome::from_worker_result(result, ctx.task_id()); - shared.send_or_store_outcome(outcome, outcome_tx); + /// Returns the weight limit for worker threads. + pub fn worker_weight_limit(&self) -> u64 { + self.shared().worker_weight_limit() + } + + /// Sets the weight limit for worker threads. + /// + /// Depending on this hive's `TaskQueues` implementation, this method may have no effect + /// (if it does not support local batching). + pub fn set_worker_weight_limit(&self, weight_limit: u64) { + self.shared().set_worker_weight_limit(weight_limit); } } } #[cfg(feature = "retry")] mod retry { - use crate::bee::{ApplyError, Queen, Worker}; - use crate::hive::{Hive, Outcome, Shared, Task}; - - impl> Hive { - #[inline] - pub(super) fn execute( - task: Task, - thread_index: usize, - worker: &mut W, - shared: &Shared, - ) { - let (input, mut ctx, outcome_tx) = task.into_parts(); - match worker.apply(input, &ctx) { - Err(ApplyError::Retryable { input, .. }) if shared.can_retry(&ctx) => { - ctx.inc_attempt(); - shared.queue_retry(thread_index, input, ctx, outcome_tx); - } - result => { - let outcome = Outcome::from_worker_result(result, ctx.task_id()); - shared.send_or_store_outcome(outcome, outcome_tx); - } - } + use crate::bee::{Queen, Worker}; + use crate::hive::{Hive, TaskQueues}; + use std::time::Duration; + + impl Hive + where + W: Worker, + Q: Queen, + T: TaskQueues, + { + /// Returns the current retry limit for this hive. + pub fn worker_retry_limit(&self) -> u8 { + self.shared().worker_retry_limit() + } + + /// Updates the retry limit for this hive and returns the previous value. + pub fn set_worker_retry_limit(&self, limit: u8) -> u8 { + self.shared().set_worker_retry_limit(limit) + } + + /// Returns the current retry factor for this hive. + pub fn worker_retry_factor(&self) -> Duration { + self.shared().worker_retry_factor() + } + + /// Updates the retry factor for this hive and returns the previous value. + pub fn set_worker_retry_factor(&self, duration: Duration) -> Duration { + self.shared().set_worker_retry_factor(duration) } } } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::Poisoned; use crate::bee::stock::{Caller, Thunk, ThunkWorker}; - use crate::hive::{outcome_channel, Builder, Outcome, OutcomeIteratorExt}; + use crate::hive::{ + Builder, ChannelBuilder, Outcome, OutcomeIteratorExt, TaskQueuesBuilder, channel_builder, + outcome_channel, + }; use std::collections::HashMap; use std::thread; use std::time::Duration; #[test] - fn test_suspend() { - let hive = Builder::new() + fn test_suspend_resume() { + let hive = channel_builder(false) .num_threads(4) - .build_with_default::>(); - let outcome_iter = - hive.map((0..10).map(|_| Thunk::of(|| thread::sleep(Duration::from_secs(3))))); + .with_worker_default::>() + .build(); + let (tx, rx) = outcome_channel(); + hive.map_send( + (0..10).map(|_| Thunk::from(|| thread::sleep(Duration::from_secs(3)))), + tx, + ); // Allow first set of tasks to be started. thread::sleep(Duration::from_secs(1)); // There should be 4 active tasks and 6 queued tasks. hive.suspend(); + assert!(hive.is_suspended()); assert_eq!(hive.num_tasks(), (6, 4)); // Wait for active tasks to complete. hive.join(); @@ -895,19 +890,21 @@ mod tests { // Wait for remaining tasks to complete. hive.join(); assert_eq!(hive.num_tasks(), (0, 0)); - let outputs: Vec<_> = outcome_iter.into_outputs().collect(); + let outputs: Vec<_> = rx.into_outputs().collect(); assert_eq!(outputs.len(), 10); } #[test] fn test_spawn_after_poison() { - let hive = Builder::new() + let hive = ChannelBuilder::empty() .num_threads(4) - .build_with_default::>(); + .with_worker_default::>() + .build(); assert_eq!(hive.max_workers(), 4); assert_eq!(hive.alive_workers(), 4); // poison hive using private method - hive.shared().poison(); + hive.0.as_ref().unwrap().poison(); + assert!(hive.is_poisoned()); // attempt to spawn a new task assert!(matches!(hive.grow(1), Err(Poisoned))); // make sure the worker count wasn't increased @@ -917,15 +914,16 @@ mod tests { #[test] fn test_apply_after_poison() { - let hive = Builder::new() + let hive = ChannelBuilder::empty() .num_threads(4) - .build_with(Caller::of(|i: usize| i * 2)); + .with_worker(Caller::from(|i: usize| i * 2)) + .build(); // poison hive using private method - hive.shared().poison(); + hive.0.as_ref().unwrap().poison(); // submit a task, check that it comes back unprocessed let (tx, rx) = outcome_channel(); let sent_input = 1; - let sent_task_id = hive.apply_send(sent_input, tx.clone()); + let sent_task_id = hive.apply_send(sent_input, &tx); let outcome = rx.recv().unwrap(); match outcome { Outcome::Unprocessed { input, task_id } => { @@ -938,16 +936,17 @@ mod tests { #[test] fn test_swarm_after_poison() { - let hive = Builder::new() + let hive = ChannelBuilder::empty() .num_threads(4) - .build_with(Caller::of(|i: usize| i * 2)); + .with_worker(Caller::from(|i: usize| i * 2)) + .build(); // poison hive using private method - hive.shared().poison(); + hive.0.as_ref().unwrap().poison(); // submit a task, check that it comes back unprocessed let (tx, rx) = outcome_channel(); let inputs = 0..10; let task_ids: HashMap = hive - .swarm_send(inputs.clone(), tx) + .swarm_send(inputs.clone(), &tx) .into_iter() .zip(inputs) .collect(); diff --git a/src/hive/husk.rs b/src/hive/husk.rs index 58ee534..6bccb3b 100644 --- a/src/hive/husk.rs +++ b/src/hive/husk.rs @@ -1,8 +1,10 @@ use super::{ - Builder, Config, DerefOutcomes, Hive, Outcome, OutcomeBatch, OutcomeSender, OutcomeStore, - OwnedOutcomes, + Config, DerefOutcomes, Hive, OpenBuilder, Outcome, OutcomeBatch, OutcomeSender, OutcomeStore, + OwnedOutcomes, TaskQueues, }; use crate::bee::{Queen, TaskId, Worker}; +use derive_more::Debug; +use std::any; use std::collections::HashMap; use std::ops::{Deref, DerefMut}; @@ -10,20 +12,23 @@ use std::ops::{Deref, DerefMut}; /// /// Provides access to the `Queen` and to stored `Outcome`s. Can be used to create a new `Hive` /// based on the previous `Hive`'s configuration. -pub struct Husk> { +#[derive(Debug)] +pub struct Husk { config: Config, + #[debug("{}", any::type_name::())] queen: Q, num_panics: usize, - outcomes: HashMap>, + #[debug(skip)] + outcomes: HashMap>, } -impl> Husk { +impl Husk { /// Creates a new `Husk`. Should only be called from `Shared::try_into_husk`. pub(super) fn new( config: Config, queen: Q, num_panics: usize, - outcomes: HashMap>, + outcomes: HashMap>, ) -> Self { Self { config, @@ -44,20 +49,23 @@ impl> Husk { } /// Consumes this `Husk` and returns the `Queen` and `Outcome`s. - pub fn into_parts(self) -> (Q, OutcomeBatch) { + pub fn into_parts(self) -> (Q, OutcomeBatch) { (self.queen, OutcomeBatch::new(self.outcomes)) } /// Returns a new `Builder` that will create a `Hive` with the same configuration as the one /// that produced this `Husk`. - pub fn as_builder(&self) -> Builder { - self.config.clone().into() + pub fn as_builder(&self) -> OpenBuilder { + OpenBuilder::from(self.config.clone()) } /// Consumes this `Husk` and returns a new `Hive` with the same configuration and `Queen` as /// the one that produced this `Husk`. - pub fn into_hive(self) -> Hive { - self.as_builder().build(self.queen) + pub fn into_hive>(self) -> Hive { + self.as_builder() + .with_queen(self.queen) + .with_queues::() + .build() } /// Consumes this `Husk` and creates a new `Hive` with the same configuration as the one that @@ -65,16 +73,20 @@ impl> Husk { /// be sent to `tx`. Returns the new `Hive` and the IDs of the tasks that were queued. /// /// This method returns a `SpawnError` if there is an error creating the new `Hive`. - pub fn into_hive_swarm_send_unprocessed( + pub fn into_hive_swarm_send_unprocessed>( mut self, - tx: OutcomeSender, - ) -> (Hive, Vec) { + tx: &OutcomeSender, + ) -> (Hive, Vec) { let unprocessed: Vec<_> = self .remove_all_unprocessed() .into_iter() .map(|(_, input)| input) .collect(); - let hive = self.as_builder().build(self.queen); + let hive = self + .as_builder() + .with_queen(self.queen) + .with_queues::() + .build(); let task_ids = hive.swarm_send(unprocessed, tx); (hive, task_ids) } @@ -85,57 +97,69 @@ impl> Husk { /// of the tasks that were queued. /// /// This method returns a `SpawnError` if there is an error creating the new `Hive`. - pub fn into_hive_swarm_store_unprocessed(mut self) -> (Hive, Vec) { + pub fn into_hive_swarm_store_unprocessed>( + mut self, + ) -> (Hive, Vec) { let unprocessed: Vec<_> = self .remove_all_unprocessed() .into_iter() .map(|(_, input)| input) .collect(); - let hive = self.as_builder().build(self.queen); + let hive = self + .as_builder() + .with_queen(self.queen) + .with_queues::() + .build(); let task_ids = hive.swarm_store(unprocessed); (hive, task_ids) } } -impl> DerefOutcomes for Husk { +impl> DerefOutcomes for Husk { #[inline] - fn outcomes_deref(&self) -> impl Deref>> { + fn outcomes_deref(&self) -> impl Deref>> { &self.outcomes } #[inline] - fn outcomes_deref_mut(&mut self) -> impl DerefMut>> { + fn outcomes_deref_mut(&mut self) -> impl DerefMut>> { &mut self.outcomes } } -impl> OwnedOutcomes for Husk { +impl> OwnedOutcomes for Husk { #[inline] - fn outcomes(self) -> HashMap> { + fn outcomes(self) -> HashMap> { self.outcomes } #[inline] - fn outcomes_ref(&self) -> &HashMap> { + fn outcomes_ref(&self) -> &HashMap> { &self.outcomes } } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use crate::bee::stock::{PunkWorker, Thunk, ThunkWorker}; - use crate::hive::{outcome_channel, Builder, Outcome, OutcomeIteratorExt, OutcomeStore}; + use crate::hive::ChannelTaskQueues; + use crate::hive::{ + Builder, ChannelBuilder, Outcome, OutcomeIteratorExt, OutcomeStore, TaskQueuesBuilder, + outcome_channel, + }; #[test] fn test_unprocessed() { // don't spin up any worker threads so that no tasks will be processed - let hive = Builder::new() + let hive = ChannelBuilder::empty() .num_threads(0) - .build_with_default::>(); - let mut task_ids = hive.map_store((0..10).map(|i| Thunk::of(move || i))); + .with_worker_default::>() + .build(); + let mut task_ids = hive.map_store((0..10).map(|i| Thunk::from(move || i))); // cancel and smash the hive before the tasks can be processed hive.suspend(); - let mut husk = hive.try_into_husk().unwrap(); + let mut husk = hive.try_into_husk(false).unwrap(); assert!(husk.has_unprocessed()); for i in task_ids.iter() { assert!(husk.get(*i).unwrap().is_unprocessed()); @@ -154,18 +178,19 @@ mod tests { #[test] fn test_reprocess_unprocessed() { // don't spin up any worker threads so that no tasks will be processed - let hive1 = Builder::new() + let hive1 = ChannelBuilder::empty() .num_threads(0) - .build_with_default::>(); - let _ = hive1.map_store((0..10).map(|i| Thunk::of(move || i))); + .with_worker_default::>() + .build(); + let _ = hive1.map_store((0..10).map(|i| Thunk::from(move || i))); // cancel and smash the hive before the tasks can be processed hive1.suspend(); - let husk1 = hive1.try_into_husk().unwrap(); - let (hive2, _) = husk1.into_hive_swarm_store_unprocessed(); + let husk1 = hive1.try_into_husk(false).unwrap(); + let (hive2, _) = husk1.into_hive_swarm_store_unprocessed::>(); // now spin up worker threads to process the tasks hive2.grow(8).expect("error spawning threads"); hive2.join(); - let husk2 = hive2.try_into_husk().unwrap(); + let husk2 = hive2.try_into_husk(false).unwrap(); assert!(!husk2.has_unprocessed()); assert!(husk2.has_successes()); assert_eq!(husk2.iter_successes().count(), 10); @@ -174,19 +199,20 @@ mod tests { #[test] fn test_reprocess_unprocessed_to() { // don't spin up any worker threads so that no tasks will be processed - let hive1 = Builder::new() + let hive1 = ChannelBuilder::empty() .num_threads(0) - .build_with_default::>(); - let _ = hive1.map_store((0..10).map(|i| Thunk::of(move || i))); + .with_worker_default::>() + .build(); + let _ = hive1.map_store((0..10).map(|i| Thunk::from(move || i))); // cancel and smash the hive before the tasks can be processed hive1.suspend(); - let husk1 = hive1.try_into_husk().unwrap(); + let husk1 = hive1.try_into_husk(false).unwrap(); let (tx, rx) = outcome_channel(); - let (hive2, task_ids) = husk1.into_hive_swarm_send_unprocessed(tx); + let (hive2, task_ids) = husk1.into_hive_swarm_send_unprocessed::>(&tx); // now spin up worker threads to process the tasks hive2.grow(8).expect("error spawning threads"); hive2.join(); - let husk2 = hive2.try_into_husk().unwrap(); + let husk2 = hive2.try_into_husk(false).unwrap(); assert!(husk2.is_empty()); let mut outputs = rx .select_ordered(task_ids) @@ -198,12 +224,13 @@ mod tests { #[test] fn test_into_result() { - let hive = Builder::new() + let hive = ChannelBuilder::empty() .num_threads(4) - .build_with_default::>(); - hive.map_store((0..10).map(|i| Thunk::of(move || i))); + .with_worker_default::>() + .build(); + hive.map_store((0..10).map(|i| Thunk::from(move || i))); hive.join(); - let mut outputs = hive.try_into_husk().unwrap().into_parts().1.unwrap(); + let mut outputs = hive.try_into_husk(false).unwrap().into_parts().1.unwrap(); outputs.sort(); assert_eq!(outputs, (0..10).collect::>()); } @@ -211,14 +238,15 @@ mod tests { #[test] #[should_panic] fn test_into_result_panic() { - let hive = Builder::new() + let hive = ChannelBuilder::empty() .num_threads(4) - .build_with_default::>(); + .with_worker_default::>() + .build(); hive.map_store( - (0..10).map(|i| Thunk::of(move || if i == 5 { panic!("oh no!") } else { i })), + (0..10).map(|i| Thunk::from(move || if i == 5 { panic!("oh no!") } else { i })), ); hive.join(); - let (_, result) = hive.try_into_husk().unwrap().into_parts(); + let (_, result) = hive.try_into_husk(false).unwrap().into_parts(); let _ = result.ok_or_unwrap_errors(true); } } diff --git a/src/hive/inner/builder.rs b/src/hive/inner/builder.rs new file mode 100644 index 0000000..aff5f0a --- /dev/null +++ b/src/hive/inner/builder.rs @@ -0,0 +1,526 @@ +use super::{Config, Token}; + +/// Private (sealed) trait depended on by `Builder` that must be implemented by builder types. +pub trait BuilderConfig { + /// Returns a reference to the underlying `Config`. + fn config_ref(&mut self, token: Token) -> &mut Config; +} + +/// Trait that provides `Builder` types with methods for setting configuration parameters. +/// There are multiple `Builder` implementations. See the +/// [module documentation](crate::hive::builder) for more details. +/// +#[doc = simple_mermaid::mermaid!("diagram.mmd")] +/// +/// This is a sealed trait, meaning it cannot be implemented outside of this crate. +pub trait Builder: BuilderConfig + Sized { + /// Sets the maximum number of worker threads that will be alive at any given moment in the + /// built [`Hive`](crate::hive::Hive). If not specified, the built `Hive` will not be + /// initialized with worker threads until [`Hive::grow`](crate::hive::Hive::grow) is called. + /// + /// # Examples + /// + /// No more than eight threads will be alive simultaneously for this hive: + /// + /// ``` + /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; + /// use beekeeper::hive::prelude::*; + /// + /// # fn main() { + /// let hive = channel_builder(false) + /// .num_threads(8) + /// .with_worker_default::>() + /// .build(); + /// + /// for _ in 0..100 { + /// hive.apply_store(Thunk::from(|| { + /// println!("Hello from a worker thread!") + /// })); + /// } + /// # } + /// ``` + fn num_threads(mut self, num: usize) -> Self { + let _ = self.config_ref(Token).num_threads.set(Some(num)); + self + } + + /// Sets the number of worker threads to the global default value. + fn with_default_num_threads(mut self) -> Self { + let _ = self + .config_ref(Token) + .num_threads + .set(super::config::DEFAULTS.lock().num_threads.get()); + self + } + + /// Specifies that the built [`Hive`](crate::hive::Hive) will use all available CPU cores for + /// worker threads. + /// + /// # Examples + /// + /// All available threads will be alive simultaneously for this hive: + /// + /// ``` + /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; + /// use beekeeper::hive::prelude::*; + /// + /// # fn main() { + /// let hive = channel_builder(false) + /// .with_thread_per_core() + /// .with_worker_default::>() + /// .build(); + /// + /// for _ in 0..100 { + /// hive.apply_store(Thunk::from(|| { + /// println!("Hello from a worker thread!") + /// })); + /// } + /// # } + /// ``` + fn with_thread_per_core(mut self) -> Self { + let _ = self + .config_ref(Token) + .num_threads + .set(Some(num_cpus::get())); + self + } + + /// Sets the thread name for each of the threads spawned by the built + /// [`Hive`](crate::hive::Hive). If not specified, threads spawned by the thread pool will be + /// unnamed. + /// + /// # Examples + /// + /// Each thread spawned by this hive will have the name `"foo"`: + /// + /// ``` + /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; + /// use beekeeper::hive::prelude::*; + /// use std::thread; + /// + /// # fn main() { + /// let hive = channel_builder(true) + /// .thread_name("foo") + /// .with_worker_default::>() + /// .build(); + /// + /// for _ in 0..100 { + /// hive.apply_store(Thunk::from(|| { + /// assert_eq!(thread::current().name(), Some("foo")); + /// })); + /// } + /// # hive.join(); + /// # } + /// ``` + fn thread_name>(mut self, name: T) -> Self { + let _ = self.config_ref(Token).thread_name.set(Some(name.into())); + self + } + + /// Sets the stack size (in bytes) for each of the threads spawned by the built + /// [`Hive`](crate::hive::Hive). If not specified, threads spawned by the hive will have a + /// stack size [as specified in the `std::thread` documentation][thread]. + /// + /// [thread]: https://doc.rust-lang.org/nightly/std/thread/index.html#stack-size + /// + /// # Examples + /// + /// Each thread spawned by this hive will have a 4 MB stack: + /// + /// ``` + /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; + /// use beekeeper::hive::prelude::*; + /// + /// # fn main() { + /// let hive = channel_builder(true) + /// .thread_stack_size(4_000_000) + /// .with_worker_default::>() + /// .build(); + /// + /// for _ in 0..100 { + /// hive.apply_store(Thunk::from(|| { + /// println!("This thread has a 4 MB stack size!"); + /// })); + /// } + /// # hive.join(); + /// # } + /// ``` + fn thread_stack_size(mut self, size: usize) -> Self { + let _ = self.config_ref(Token).thread_stack_size.set(Some(size)); + self + } + + /// Sets set list of CPU core indices to which threads in the `Hive` should be pinned. + /// + /// Core indices are integers in the range `0..N`, where `N` is the number of available CPU + /// cores as reported by [`num_cpus::get()`]. The mapping between core indices and core IDs + /// is platform-specific. All CPU cores on a given system should be equivalent, and thus it + /// does not matter which cores are pinned so long as a core is not pinned to multiple + /// threads. + /// + /// Excess core indices (i.e., if `affinity.len() > num_threads`) are ignored. If + /// `affinity.len() < num_threads` then the excess threads will not be pinned. + /// + /// # Examples + /// + /// Each thread spawned by this hive will be pinned to a core: + /// + /// ``` + /// use beekeeper::bee::stock::{Thunk, ThunkWorker}; + /// use beekeeper::hive::prelude::*; + /// + /// # fn main() { + /// let hive = channel_builder(false) + /// .num_threads(4) + /// .core_affinity(0..4) + /// .with_worker_default::>() + /// .build(); + /// + /// for _ in 0..100 { + /// hive.apply_store(Thunk::from(|| { + /// println!("This thread is pinned!"); + /// })); + /// } + /// # hive.join(); + /// # } + /// ``` + #[cfg(feature = "affinity")] + fn core_affinity>(mut self, affinity: C) -> Self { + let _ = self.config_ref(Token).affinity.set(Some(affinity.into())); + self + } + + /// Specifies that worker threads should be pinned to all available CPU cores. If + /// `num_threads` is greater than the available number of CPU cores, then some threads + /// might not be pinned. + #[cfg(feature = "affinity")] + fn with_default_core_affinity(mut self) -> Self { + let _ = self + .config_ref(Token) + .affinity + .set(Some(crate::hive::cores::Cores::all())); + self + } + + /// Sets the worker thread batch size. + /// + /// This may have no effect if the `TaskQueues` implementation used for this hive does not + /// support local batching. + /// + /// If `batch_limit` is `0`, local batching is effectively disabled, but note that the + /// performance may be worse than with the `local-batch` feature disabled. + #[cfg(feature = "local-batch")] + fn batch_limit(mut self, batch_limit: usize) -> Self { + if batch_limit == 0 { + self.config_ref(Token).batch_limit.set(None); + } else { + self.config_ref(Token).batch_limit.set(Some(batch_limit)); + } + self + } + + /// Sets the worker thread batch size to the global default value. + #[cfg(feature = "local-batch")] + fn with_default_batch_limit(mut self) -> Self { + let _ = self + .config_ref(Token) + .batch_limit + .set(super::config::DEFAULTS.lock().batch_limit.get()); + self + } + + /// Sets the maximum weight of the tasks a worker thread can have at any given time. + /// + /// If `weight_limit` is `0`, weighting is effectively disabled, but note that the performance + /// may be worse than with the `weighting` feature disabled. + /// + /// If a task has a weight greater than the limit, it is immediately converted to + /// `Outcome::WeightLimitExceeded` and sent or stored. + /// + /// If the `local-batch` feature is enabled, this limit determines the maximum total "weight" of + /// active and pending tasks in the worker's local queue. + #[cfg(feature = "local-batch")] + fn weight_limit(mut self, weight_limit: u64) -> Self { + if weight_limit == 0 { + self.config_ref(Token).weight_limit.set(None); + } else { + self.config_ref(Token).weight_limit.set(Some(weight_limit)); + } + self + } + + /// Sets the worker thread batch size to the global default value. + #[cfg(feature = "local-batch")] + fn with_default_weight_limit(mut self) -> Self { + let _ = self + .config_ref(Token) + .weight_limit + .set(super::config::DEFAULTS.lock().weight_limit.get()); + self + } + + /// Disables local batching. + #[cfg(feature = "local-batch")] + fn with_no_local_batching(self) -> Self { + self.batch_limit(0).weight_limit(0) + } + + /// Sets the maximum number of times to retry a + /// [`ApplyError::Retryable`](crate::bee::ApplyError::Retryable) error. A worker + /// thread will retry a task until it either returns + /// [`ApplyError::Fatal`](crate::bee::ApplyError::Fatal) or the maximum number of retries is + /// reached. Each time a task is retried, the worker thread will first sleep for + /// `retry_factor * (2 ** (attempt - 1))` before attempting the task again. If not + /// specified, tasks are retried a default number of times. If set to `0`, tasks will be + /// retried immediately without delay. + /// + /// # Examples + /// + /// ``` + /// use beekeeper::bee::{ApplyError, Context}; + /// use beekeeper::bee::stock::RetryCaller; + /// use beekeeper::hive::prelude::*; + /// use std::time; + /// + /// fn sometimes_fail( + /// i: usize, + /// _: &Context + /// ) -> Result> { + /// match i % 3 { + /// 0 => Ok("Success".into()), + /// 1 => Err(ApplyError::Retryable { input: i, error: "Retryable".into() }), + /// 2 => Err(ApplyError::Fatal { input: Some(i), error: "Fatal".into() }), + /// _ => unreachable!(), + /// } + /// } + /// + /// # fn main() { + /// let hive = channel_builder(true) + /// .max_retries(3) + /// .with_worker(RetryCaller::from(sometimes_fail)) + /// .build(); + /// + /// for i in 0..10 { + /// hive.apply_store(i); + /// } + /// # hive.join(); + /// # } + /// ``` + #[cfg(feature = "retry")] + fn max_retries(mut self, limit: u8) -> Self { + let _ = if limit == 0 { + self.config_ref(Token).max_retries.set(None) + } else { + self.config_ref(Token).max_retries.set(Some(limit)) + }; + self + } + + /// Sets the exponential back-off factor for retrying tasks. Each time a task is retried, + /// the thread will first sleep for `retry_factor * (2 ** (attempt - 1))`. If not + /// specififed, a default retry factor is used. Set to + /// [`Duration::ZERO`](std::time::Duration::ZERO) to disableexponential backoff. + /// + /// # Examples + /// + /// ``` + /// use beekeeper::bee::{ApplyError, Context}; + /// use beekeeper::bee::stock::RetryCaller; + /// use beekeeper::hive::prelude::*; + /// use std::time; + /// + /// fn echo_time(i: usize, ctx: &Context) -> Result> { + /// let attempt = ctx.attempt(); + /// if attempt == 3 { + /// Ok("Success".into()) + /// } else { + /// // the delay between each message should be exponential + /// println!("Task {} attempt {}: {:?}", i, attempt, time::SystemTime::now()); + /// Err(ApplyError::Retryable { input: i, error: "Retryable".into() }) + /// } + /// } + /// + /// # fn main() { + /// let hive = channel_builder(true) + /// .max_retries(3) + /// .retry_factor(time::Duration::from_secs(1)) + /// .with_worker(RetryCaller::from(echo_time)) + /// .build(); + /// + /// for i in 0..10 { + /// hive.apply_store(i); + /// } + /// # hive.join(); + /// # } + /// ``` + #[cfg(feature = "retry")] + fn retry_factor(mut self, duration: std::time::Duration) -> Self { + if duration == std::time::Duration::ZERO { + let _ = self.config_ref(Token).retry_factor.set(None); + } else { + let _ = self.config_ref(Token).set_retry_factor_from(duration); + }; + self + } + + /// Sets retry parameters to their default values. + #[cfg(feature = "retry")] + fn with_default_max_retries(mut self) -> Self { + let _ = self + .config_ref(Token) + .max_retries + .set(super::config::DEFAULTS.lock().max_retries.get()); + + self + } + + #[cfg(feature = "retry")] + fn with_default_retry_factor(mut self) -> Self { + let _ = self + .config_ref(Token) + .retry_factor + .set(super::config::DEFAULTS.lock().retry_factor.get()); + self + } + + /// Disables retrying tasks. + #[cfg(feature = "retry")] + fn with_no_retries(self) -> Self { + self.max_retries(0).retry_factor(std::time::Duration::ZERO) + } +} + +impl Builder for B {} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use super::*; + pub struct TestBuilder(Config); + + impl TestBuilder { + pub fn empty() -> Self { + TestBuilder(Config::empty()) + } + } + + impl BuilderConfig for TestBuilder { + fn config_ref(&mut self, _: Token) -> &mut Config { + &mut self.0 + } + } + + #[test] + fn test_common() { + let mut builder = TestBuilder::empty() + .num_threads(4) + .thread_name("foo") + .thread_stack_size(100); + crate::hive::inner::builder_test_utils::check_builder(&mut builder); + } +} + +#[cfg(all(test, feature = "affinity"))] +#[cfg_attr(coverage_nightly, coverage(off))] +mod affinity_tests { + use super::tests::TestBuilder; + use super::*; + use crate::hive::cores::Cores; + + #[test] + fn test_core_affinity() { + let mut builder = TestBuilder::empty(); + builder = builder.core_affinity(Cores::first(4)); + assert_eq!( + builder.config_ref(Token).affinity.get(), + Some((0..4).into()) + ); + } + + #[test] + fn test_with_default_core_affinity() { + let mut builder = TestBuilder::empty(); + builder = builder.with_default_core_affinity(); + assert_eq!(builder.config_ref(Token).affinity.get(), Some(Cores::all())); + } +} + +#[cfg(all(test, feature = "local-batch"))] +#[cfg_attr(coverage_nightly, coverage(off))] +mod local_batch_tests { + use super::tests::TestBuilder; + use super::*; + use crate::hive::inner::config::DEFAULTS; + + #[test] + fn test_batch_config() { + let mut builder = TestBuilder::empty().batch_limit(10).weight_limit(100); + let config = builder.config_ref(Token); + assert_eq!(config.batch_limit.get(), Some(10)); + assert_eq!(config.weight_limit.get(), Some(100)); + } + + #[test] + fn test_disable_batch_config() { + let mut builder = TestBuilder::empty().with_no_local_batching(); + let config = builder.config_ref(Token); + assert_eq!(config.batch_limit.get(), None); + assert_eq!(config.weight_limit.get(), None); + } + + #[test] + fn test_default_batch_config() { + let mut builder = TestBuilder::empty() + .with_default_batch_limit() + .with_default_weight_limit(); + let config = builder.config_ref(Token); + assert_eq!(config.batch_limit.get(), DEFAULTS.lock().batch_limit.get()); + assert_eq!( + config.weight_limit.get(), + DEFAULTS.lock().weight_limit.get() + ); + } +} + +#[cfg(all(test, feature = "retry"))] +#[cfg_attr(coverage_nightly, coverage(off))] +mod retry_tests { + use super::tests::TestBuilder; + use super::*; + use crate::hive::inner::config::DEFAULTS; + use std::time::Duration; + + #[test] + fn test_retry_config() { + let mut builder = TestBuilder::empty() + .max_retries(5) + .retry_factor(Duration::from_secs(10)); + let config = builder.config_ref(Token); + assert_eq!(config.max_retries.get(), Some(5)); + assert_eq!( + config.retry_factor.get(), + Some(Duration::from_secs(10).as_nanos() as u64) + ); + } + + #[test] + fn test_disable_retry() { + let mut builder = TestBuilder::empty().with_no_retries(); + let config = builder.config_ref(Token); + assert_eq!(config.max_retries.get(), None); + assert_eq!(config.retry_factor.get(), None); + } + + #[test] + fn test_default_retry_config() { + let mut builder = TestBuilder::empty() + .with_default_max_retries() + .with_default_retry_factor(); + let config = builder.config_ref(Token); + assert_eq!(config.max_retries.get(), DEFAULTS.lock().max_retries.get()); + assert_eq!( + config.retry_factor.get(), + DEFAULTS.lock().retry_factor.get() + ); + } +} diff --git a/src/hive/config.rs b/src/hive/inner/config.rs similarity index 62% rename from src/hive/config.rs rename to src/hive/inner/config.rs index 43f2c6f..358076c 100644 --- a/src/hive/config.rs +++ b/src/hive/inner/config.rs @@ -1,7 +1,11 @@ -#[cfg(feature = "batching")] -pub use batching::set_batch_size_default; +#[cfg(feature = "local-batch")] +pub use self::local_batch::set_batch_limit_default; +#[cfg(feature = "local-batch")] +pub use self::local_batch::set_weight_limit_default; #[cfg(feature = "retry")] -pub use retry::{set_max_retries_default, set_retries_default_disabled, set_retry_factor_default}; +pub use self::retry::{ + set_max_retries_default, set_retries_default_disabled, set_retry_factor_default, +}; use super::Config; use parking_lot::Mutex; @@ -9,8 +13,8 @@ use std::sync::LazyLock; const DEFAULT_NUM_THREADS: usize = 4; -pub(super) static DEFAULTS: LazyLock> = LazyLock::new(|| { - let mut config = Config::default(); +pub static DEFAULTS: LazyLock> = LazyLock::new(|| { + let mut config = Config::empty(); config.set_const_defaults(); Mutex::new(config) }); @@ -35,18 +39,27 @@ pub fn reset_defaults() { impl Config { /// Creates a new `Config` with all values unset. pub fn empty() -> Self { - Self::default() - } - - /// Creates a new `Config` with default values. This simply clones `DEFAULTS`. - pub fn with_defaults() -> Self { - DEFAULTS.lock().clone() + Self { + num_threads: Default::default(), + thread_name: Default::default(), + thread_stack_size: Default::default(), + #[cfg(feature = "affinity")] + affinity: Default::default(), + #[cfg(feature = "local-batch")] + batch_limit: Default::default(), + #[cfg(feature = "local-batch")] + weight_limit: Default::default(), + #[cfg(feature = "retry")] + max_retries: Default::default(), + #[cfg(feature = "retry")] + retry_factor: Default::default(), + } } /// Resets config values to their pre-configured defaults. fn set_const_defaults(&mut self) { self.num_threads.set(Some(DEFAULT_NUM_THREADS)); - #[cfg(feature = "batching")] + #[cfg(feature = "local-batch")] self.set_batch_const_defaults(); #[cfg(feature = "retry")] self.set_retry_const_defaults(); @@ -60,12 +73,14 @@ impl Config { thread_stack_size: self.thread_stack_size.into_sync(), #[cfg(feature = "affinity")] affinity: self.affinity.into_sync(), - #[cfg(feature = "batching")] - batch_size: self.batch_size.into_sync_default(), + #[cfg(feature = "local-batch")] + batch_limit: self.batch_limit.into_sync_default(), + #[cfg(feature = "local-batch")] + weight_limit: self.weight_limit.into_sync_default(), #[cfg(feature = "retry")] - max_retries: self.max_retries.into_sync(), + max_retries: self.max_retries.into_sync_default(), #[cfg(feature = "retry")] - retry_factor: self.retry_factor.into_sync(), + retry_factor: self.retry_factor.into_sync_default(), } } @@ -78,8 +93,10 @@ impl Config { thread_stack_size: self.thread_stack_size.into_unsync(), #[cfg(feature = "affinity")] affinity: self.affinity.into_unsync(), - #[cfg(feature = "batching")] - batch_size: self.batch_size.into_unsync(), + #[cfg(feature = "local-batch")] + batch_limit: self.batch_limit.into_unsync(), + #[cfg(feature = "local-batch")] + weight_limit: self.weight_limit.into_unsync(), #[cfg(feature = "retry")] max_retries: self.max_retries.into_unsync(), #[cfg(feature = "retry")] @@ -88,7 +105,15 @@ impl Config { } } +impl Default for Config { + /// Creates a new `Config` with default values. This simply clones `DEFAULTS`. + fn default() -> Self { + DEFAULTS.lock().clone() + } +} + #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] pub mod reset { /// Struct that resets the default values when `drop`ped. pub struct Reset; @@ -101,9 +126,10 @@ pub mod reset { } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { - use super::reset::Reset; use super::Config; + use super::reset::Reset; use serial_test::serial; #[test] @@ -111,35 +137,42 @@ mod tests { fn test_set_num_threads_default() { let reset = Reset; super::set_num_threads_default(2); - let config = Config::with_defaults(); + let config = Config::default(); assert_eq!(config.num_threads.get(), Some(2)); // Dropping `Reset` should reset the defaults drop(reset); let reset = Reset; super::set_num_threads_default_all(); - let config = Config::with_defaults(); + let config = Config::default(); assert_eq!(config.num_threads.get(), Some(num_cpus::get())); drop(reset); - let config = Config::with_defaults(); + let config = Config::default(); assert_eq!(config.num_threads.get(), Some(super::DEFAULT_NUM_THREADS)); } } -#[cfg(feature = "batching")] -mod batching { +#[cfg(feature = "local-batch")] +mod local_batch { use super::{Config, DEFAULTS}; - const DEFAULT_BATCH_SIZE: usize = 10; + const DEFAULT_BATCH_LIMIT: usize = 10; + + /// Sets the batch limit a `config` is configured with when using `Builder::default()`. + pub fn set_batch_limit_default(batch_limit: usize) { + DEFAULTS.lock().batch_limit.set(Some(batch_limit)); + } - pub fn set_batch_size_default(batch_size: usize) { - DEFAULTS.lock().batch_size.set(Some(batch_size)); + /// Sets the weight limit a `config` is configured with when using `Builder::default()`. + pub fn set_weight_limit_default(weight_limit: u64) { + DEFAULTS.lock().weight_limit.set(Some(weight_limit)); } impl Config { pub(super) fn set_batch_const_defaults(&mut self) { - self.batch_size.set(Some(DEFAULT_BATCH_SIZE)); + self.batch_limit.set(Some(DEFAULT_BATCH_LIMIT)); + self.weight_limit.set(None); } } } @@ -149,27 +182,40 @@ mod retry { use super::{Config, DEFAULTS}; use std::time::Duration; - const DEFAULT_MAX_RETRIES: u32 = 3; + const DEFAULT_MAX_RETRIES: u8 = 3; const DEFAULT_RETRY_FACTOR_SECS: u64 = 1; - /// Sets the max number of retries a `config` is configured with when using `Config::with_defaults()`. - pub fn set_max_retries_default(num_retries: u32) { + /// Sets the max number of retries a `config` is configured with when using `Builder::default()`. + pub fn set_max_retries_default(num_retries: u8) { DEFAULTS.lock().max_retries.set(Some(num_retries)); } - /// Sets the retry factor a `config` is configured with when using `Config::with_defaults()`. + /// Sets the retry factor a `config` is configured with when using `Builder::default()`. pub fn set_retry_factor_default(retry_factor: Duration) { DEFAULTS.lock().set_retry_factor_from(retry_factor); } - /// Specifies that retries should be disabled by default when using `Config::with_defaults()`. + /// Specifies that retries should be disabled by default when using `Builder::default()`. pub fn set_retries_default_disabled() { set_max_retries_default(0); } impl Config { - pub fn set_retry_factor_from(&mut self, duration: Duration) -> Option { - self.retry_factor.set(Some(duration.as_nanos() as u64)) + pub fn get_retry_factor_duration(&self) -> Option { + self.retry_factor.get().map(Duration::from_nanos) + } + + pub fn set_retry_factor_from(&mut self, duration: Duration) -> Option { + self.retry_factor + .set(Some(duration.as_nanos() as u64)) + .map(Duration::from_nanos) + } + + pub fn try_set_retry_factor_from(&self, duration: Duration) -> Option { + self.retry_factor + .try_set(duration.as_nanos() as u64) + .map(Duration::from_nanos) + .ok() } pub(super) fn set_retry_const_defaults(&mut self) { @@ -181,35 +227,30 @@ mod retry { } #[cfg(test)] + #[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::Config; - use crate::hive::config::reset::Reset; + use crate::hive::inner::config::reset::Reset; use serial_test::serial; use std::time::Duration; - impl Config { - fn get_retry_factor_duration(&self) -> Option { - self.retry_factor.get().map(Duration::from_nanos) - } - } - #[test] #[serial] fn test_set_max_retries_default() { let reset = Reset; super::set_max_retries_default(1); - let config = Config::with_defaults(); + let config = Config::default(); assert_eq!(config.max_retries.get(), Some(1)); // Dropping `Reset` should reset the defaults drop(reset); let reset = Reset; super::set_retries_default_disabled(); - let config = Config::with_defaults(); + let config = Config::default(); assert_eq!(config.max_retries.get(), Some(0)); drop(reset); - let config = Config::with_defaults(); + let config = Config::default(); assert_eq!(config.max_retries.get(), Some(super::DEFAULT_MAX_RETRIES)); } @@ -218,14 +259,14 @@ mod retry { fn test_set_retry_factor_default() { let reset = Reset; super::set_retry_factor_default(Duration::from_secs(2)); - let config = Config::with_defaults(); + let config = Config::default(); assert_eq!( config.get_retry_factor_duration(), Some(Duration::from_secs(2)) ); // Dropping `Reset` should reset the defaults drop(reset); - let config = Config::with_defaults(); + let config = Config::default(); assert_eq!( config.get_retry_factor_duration(), Some(Duration::from_secs(super::DEFAULT_RETRY_FACTOR_SECS)) diff --git a/src/hive/counter.rs b/src/hive/inner/counter.rs similarity index 98% rename from src/hive/counter.rs rename to src/hive/inner/counter.rs index bb19c11..a81147a 100644 --- a/src/hive/counter.rs +++ b/src/hive/inner/counter.rs @@ -4,8 +4,6 @@ use crate::atomic::{Atomic, AtomicInt, AtomicU64, Ordering, Orderings}; const SEQCST_ORDERING: Orderings = Orderings { load: Ordering::SeqCst, swap: Ordering::SeqCst, - fetch_update_set: Ordering::SeqCst, - fetch_update_fetch: Ordering::SeqCst, fetch_add: Ordering::SeqCst, fetch_sub: Ordering::SeqCst, }; @@ -26,8 +24,8 @@ pub enum CounterError { /// The two values may be different sizes, but their total size in bits must equal the size of the /// data type (for now fixed to `64`) used to store the value. /// -/// Three operations are supported: -/// * increment the left counter (`L`) +/// The following operations are supported: +/// * increment/decrement the left counter (`L`) /// * decrement the right counter (`R`) /// * transfer an amount `N` from `L` to `R` (i.e., a simultaneous decrement of `L` and /// increment of `R` by the same amount) @@ -140,6 +138,7 @@ impl Default for DualCounter { } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::*; diff --git a/src/hive/inner/diagram.mmd b/src/hive/inner/diagram.mmd new file mode 100644 index 0000000..a9da1e2 --- /dev/null +++ b/src/hive/inner/diagram.mmd @@ -0,0 +1,5 @@ +graph TD; + Generic-->Queue + Generic-->Bee + Bee-->Full + Queue-->Full \ No newline at end of file diff --git a/src/hive/gate.rs b/src/hive/inner/gate.rs similarity index 100% rename from src/hive/gate.rs rename to src/hive/inner/gate.rs diff --git a/src/hive/inner/mod.rs b/src/hive/inner/mod.rs new file mode 100644 index 0000000..a9b5946 --- /dev/null +++ b/src/hive/inner/mod.rs @@ -0,0 +1,127 @@ +//! Internal data structures needed to implement `Hive`. +mod builder; +mod config; +mod counter; +mod gate; +mod queue; +mod shared; +mod task; + +/// Prelude-like module that collects all the functions for setting global configuration defaults. +pub mod set_config { + pub use super::config::{reset_defaults, set_num_threads_default, set_num_threads_default_all}; + #[cfg(feature = "local-batch")] + pub use super::config::{set_batch_limit_default, set_weight_limit_default}; + #[cfg(feature = "retry")] + pub use super::config::{ + set_max_retries_default, set_retries_default_disabled, set_retry_factor_default, + }; +} + +// Note: it would be more appropriate for the publicly exported traits (`Builder`, `TaskQueues`) +// to be in the `beekeeper::hive` module, but they need to be in `inner` for visiblity reasons. + +pub use self::builder::{Builder, BuilderConfig}; +pub use self::queue::{ChannelTaskQueues, TaskQueues, WorkerQueues, WorkstealingTaskQueues}; +pub use self::task::TaskInput; + +use self::counter::DualCounter; +use self::gate::{Gate, PhasedGate}; +use self::queue::PopTaskError; +use crate::atomic::{AtomicAny, AtomicBool, AtomicOption, AtomicUsize}; +use crate::bee::{Queen, TaskMeta, Worker}; +use crate::hive::{OutcomeQueue, OutcomeSender, SpawnError}; +use parking_lot::Mutex; +use std::thread::JoinHandle; + +type Any = AtomicOption>; +type Usize = AtomicOption; +#[cfg(feature = "retry")] +type U8 = AtomicOption; +#[cfg(any(feature = "local-batch", feature = "retry"))] +type U64 = AtomicOption; + +/// Private, zero-size struct used to call private methods in public sealed traits. +pub struct Token; + +/// Internal representation of a task to be processed by a `Hive`. +#[derive(Debug)] +pub struct Task { + input: W::Input, + meta: TaskMeta, + outcome_tx: Option>, +} + +/// Data shared by all worker threads in a `Hive`. This is the private API used by the `Hive` and +/// worker threads to enqueue, dequeue, and process tasks. +pub struct Shared> { + /// core configuration parameters + config: Config, + /// the `Queen` used to create new workers + queen: Q, + /// global and local task queues used by the `Hive` to send tasks to the worker threads + task_queues: T, + /// The results of spawning each worker + spawn_results: Mutex, SpawnError>>>, + /// allows for 2^48 queued tasks and 2^16 active tasks + num_tasks: DualCounter<48>, + /// ID that will be assigned to the next task submitted to the `Hive` + next_task_id: AtomicUsize, + /// number of times a worker has panicked + num_panics: AtomicUsize, + /// number of `Hive` clones with a reference to this shared data + num_referrers: AtomicUsize, + /// whether the internal state of the hive is corrupted - if true, this prevents new tasks from + /// processed (new tasks may be queued but they will never be processed); currently, this can + /// only happen if the task counter somehow get corrupted + poisoned: AtomicBool, + /// whether the hive is suspended - if true, active tasks may complete and new tasks may be + /// queued, but new tasks will not be processed + suspended: AtomicBool, + /// gate used by worker threads to wait until the hive is resumed + resume_gate: Gate, + /// gate used by client threads to wait until all tasks have completed + join_gate: PhasedGate, + /// outcomes stored in the hive + outcomes: OutcomeQueue, +} + +/// Core configuration parameters that are set by a `Builder`, used in a `Hive`, and preserved in a +/// `Husk`. Fields are `AtomicOption`s, which enables them to be transitioned back and forth +/// between thread-safe and non-thread-safe contexts. +#[derive(Clone, Debug)] +pub struct Config { + /// Number of worker threads to spawn + num_threads: Usize, + /// Name to give each worker thread + thread_name: Any, + /// Stack size for each worker thread + thread_stack_size: Usize, + /// CPU cores to which worker threads can be pinned + #[cfg(feature = "affinity")] + affinity: Any, + /// Maximum number of tasks for a worker thread to take when receiving from the input channel + #[cfg(feature = "local-batch")] + batch_limit: Usize, + /// Maximum "weight" of tasks a worker thread may have active and pending + #[cfg(feature = "local-batch")] + weight_limit: U64, + /// Maximum number of retries for a task + #[cfg(feature = "retry")] + max_retries: U8, + /// Multiplier for the retry backoff strategy + #[cfg(feature = "retry")] + retry_factor: U64, +} + +#[cfg(test)] +pub(super) mod builder_test_utils { + use super::*; + + pub fn check_builder(builder: &mut B) { + let config = builder.config_ref(Token); + assert_eq!(config.num_threads.get(), Some(4)); + assert_eq!(config.thread_name.get(), Some("foo".into())); + assert_eq!(config.thread_stack_size.get(), Some(100)); + } +} diff --git a/src/hive/inner/queue/channel.rs b/src/hive/inner/queue/channel.rs new file mode 100644 index 0000000..c9a3ae2 --- /dev/null +++ b/src/hive/inner/queue/channel.rs @@ -0,0 +1,426 @@ +//! Implementation of `TaskQueues` that uses `crossbeam` channels for the global queue (i.e., for +//! sending tasks from the `Hive` to the worker threads) and a default implementation of local +//! queues that depends on which combination of the `retry` and `local-batch` features are enabled. +use super::{Config, PopTaskError, Status, Task, TaskQueues, Token, WorkerQueues}; +use crate::bee::Worker; +use crossbeam_channel::RecvTimeoutError; +use crossbeam_queue::SegQueue; +use derive_more::Debug; +use parking_lot::RwLock; +use std::any; +use std::sync::Arc; +use std::time::Duration; + +// time to wait when polling the global queue +const RECV_TIMEOUT: Duration = Duration::from_millis(100); + +/// Type alias for the input task channel sender +type TaskSender = crossbeam_channel::Sender>; +/// Type alias for the input task channel receiver +type TaskReceiver = crossbeam_channel::Receiver>; + +/// `TaskQueues` implementation using `crossbeam` channels for the global queue. +/// +/// Worker threads may have access to local retry and/or batch queues, depending on which features +/// are enabled. +#[derive(Debug)] +#[debug("ChannelTaskQueues<{}>", any::type_name::())] +pub struct ChannelTaskQueues { + global: Arc>, + local: RwLock>>>, +} + +impl TaskQueues for ChannelTaskQueues { + type WorkerQueues = ChannelWorkerQueues; + + fn new(_: Token) -> Self { + Self { + global: Arc::new(GlobalQueue::new()), + local: Default::default(), + } + } + + fn init_for_threads(&self, start_index: usize, end_index: usize, config: &Config) { + let mut local_queues = self.local.write(); + assert_eq!(local_queues.len(), start_index); + (start_index..end_index).for_each(|thread_index| { + local_queues.push(Arc::new(LocalQueueShared::new(thread_index, config))) + }); + } + + fn update_for_threads(&self, start_index: usize, end_index: usize, config: &Config) { + let local_queues = self.local.write(); + assert!(local_queues.len() >= end_index); + local_queues[start_index..end_index] + .iter() + .for_each(|queue| queue.update(&self.global, config)); + } + + fn try_push_global(&self, task: Task) -> Result<(), Task> { + self.global.try_push(task) + } + + fn worker_queues(&self, thread_index: usize) -> Self::WorkerQueues { + ChannelWorkerQueues::new(&self.global, &self.local.read()[thread_index]) + } + + fn close(&self, urgent: bool, _: Token) { + self.global.close(urgent) + } + + fn drain(self) -> Vec> { + if !self.global.is_closed() { + panic!("close must be called before drain"); + } + let mut tasks = Vec::new(); + let global = crate::hive::util::unwrap_arc(self.global) + .unwrap_or_else(|_| panic!("timeout waiting to take ownership of global queue")); + global.drain_into(&mut tasks); + for local in self.local.into_inner().into_iter() { + let local = crate::hive::util::unwrap_arc(local) + .unwrap_or_else(|_| panic!("timeout waiting to take ownership of local queue")); + local.drain_into(&mut tasks); + } + tasks + } +} + +pub struct GlobalQueue { + global_tx: TaskSender, + global_rx: TaskReceiver, + status: Status, +} + +impl GlobalQueue { + fn new() -> Self { + let (tx, rx) = crossbeam_channel::unbounded(); + Self { + global_tx: tx, + global_rx: rx, + status: Default::default(), + } + } + + #[inline] + fn try_push(&self, task: Task) -> Result<(), Task> { + if !self.status.can_push() { + return Err(task); + } + self.global_tx.send(task).map_err(|err| err.into_inner()) + } + + #[inline] + fn try_pop(&self) -> Result, PopTaskError> { + match self.global_rx.recv_timeout(RECV_TIMEOUT) { + Ok(task) => Ok(task), + Err(RecvTimeoutError::Disconnected) => Err(PopTaskError::Closed), + Err(RecvTimeoutError::Timeout) if self.is_closed() && self.global_rx.is_empty() => { + Err(PopTaskError::Closed) + } + Err(RecvTimeoutError::Timeout) => Err(PopTaskError::Empty), + } + } + + #[inline] + fn is_closed(&self) -> bool { + self.status.is_closed() + } + + fn close(&self, urgent: bool) { + self.status.set(urgent); + } + + fn drain_into(self, tasks: &mut Vec>) { + tasks.reserve(self.global_rx.len()); + tasks.extend(self.global_rx.try_iter()); + } + + #[cfg(feature = "local-batch")] + fn try_iter(&self) -> impl Iterator> { + self.global_rx.try_iter() + } +} + +pub struct ChannelWorkerQueues { + global: Arc>, + shared: Arc>, +} + +impl ChannelWorkerQueues { + fn new(global_queue: &Arc>, shared: &Arc>) -> Self { + Self { + global: Arc::clone(global_queue), + shared: Arc::clone(shared), + } + } +} + +impl WorkerQueues for ChannelWorkerQueues { + fn push(&self, task: Task) { + self.shared.push(task, &self.global); + } + + fn try_pop(&self) -> Result, PopTaskError> { + self.shared.try_pop(&self.global) + } + + #[cfg(feature = "retry")] + fn try_push_retry(&self, task: Task) -> Result> { + self.shared.try_push_retry(task) + } + + #[cfg(test)] + fn thread_index(&self) -> usize { + self.shared._thread_index + } +} + +/// Worker thread-specific data shared with the main thread. +struct LocalQueueShared { + _thread_index: usize, + /// queue of abandon tasks + local_abandoned: SegQueue>, + /// thread-local queue of tasks used when the `local-batch` feature is enabled + #[cfg(feature = "local-batch")] + local_batch: local_batch::WorkerBatchQueue, + /// thread-local queues used for tasks that are waiting to be retried after a failure + #[cfg(feature = "retry")] + local_retry: super::RetryQueue, +} + +impl LocalQueueShared { + fn new(thread_index: usize, _config: &Config) -> Self { + Self { + _thread_index: thread_index, + local_abandoned: Default::default(), + #[cfg(feature = "local-batch")] + local_batch: local_batch::WorkerBatchQueue::new( + _config.batch_limit.get_or_default(), + _config.weight_limit.get_or_default(), + ), + #[cfg(feature = "retry")] + local_retry: super::RetryQueue::new(_config.retry_factor.get_or_default()), + } + } + + /// Updates the local queues based on the provided `config`: + /// * If `local-batch` is enabled, resizes the batch queue if necessary. + /// * If `retry` is enabled, updates the retry factor. + fn update(&self, _global: &GlobalQueue, _config: &Config) { + #[cfg(feature = "local-batch")] + self.local_batch.set_limits( + _config.batch_limit.get_or_default(), + _config.weight_limit.get_or_default(), + _global, + self, + ); + #[cfg(feature = "retry")] + self.local_retry + .set_delay_factor(_config.retry_factor.get_or_default()); + } + + #[inline] + fn push(&self, task: Task, global: &GlobalQueue) { + #[cfg(feature = "local-batch")] + let task = match self.local_batch.try_push(task) { + Ok(_) => return, + Err(task) => task, + }; + self.push_global(task, global); + } + + #[inline] + fn push_global(&self, task: Task, global: &GlobalQueue) { + let task = match global.try_push(task) { + Ok(_) => return, + Err(task) => task, + }; + self.local_abandoned.push(task); + } + + #[inline] + fn try_pop(&self, global: &GlobalQueue) -> Result, PopTaskError> { + if !global.status.can_pop() { + return Err(PopTaskError::Closed); + } + // first try to get a previously abandoned task + if let Some(task) = self.local_abandoned.pop() { + return Ok(task); + } + // if retry is enabled, try to get a task from the retry queue + #[cfg(feature = "retry")] + if let Some(task) = self.local_retry.try_pop() { + return Ok(task); + } + // if local batching is enabled, try to get a task from the batch queue and try to refill + // it from the global queue if it's empty + #[cfg(feature = "local-batch")] + { + self.local_batch.try_pop_or_refill(global, self) + } + // fall back to requesting a task from the global queue + #[cfg(not(feature = "local-batch"))] + { + global.try_pop() + } + } + + #[cfg(feature = "retry")] + fn try_push_retry(&self, task: Task) -> Result> { + self.local_retry.try_push(task) + } + + /// Consumes this `ChannelWorkerQueues` and drains the tasks currently in the queues into + /// `tasks`. + fn drain_into(self, tasks: &mut Vec>) { + while let Some(task) = self.local_abandoned.pop() { + tasks.push(task); + } + #[cfg(feature = "local-batch")] + self.local_batch.drain_into(tasks); + #[cfg(feature = "retry")] + self.local_retry.drain_into(tasks); + } +} + +#[cfg(feature = "local-batch")] +mod local_batch { + use super::{GlobalQueue, LocalQueueShared, Task}; + use crate::atomic::{Atomic, AtomicU64, AtomicUsize}; + use crate::bee::Worker; + use crate::hive::inner::queue::PopTaskError; + use crossbeam_queue::ArrayQueue; + use parking_lot::RwLock; + + /// Worker thread-local queue for tasks used to reduce the frequency of polling the global + /// queue (which may have a lot of contention from other worker threads). + /// + /// When the queue is empty, then it attempts to refill itself from the global queue. This is + /// done considering both the size and weight limits - i.e., the local queue is filled until + /// either it is full or the total weight of queued tasks exceeds the weight limit. + /// + /// This queue is implemented internally using a crossbeam `ArrayQueue`, which has a fixed size. + /// The queue can be resized dynamically by creating a new queue and copying the tasks over. If + /// the new queue is smaller than the old one, then any excess tasks are pushed back to the + /// global queue. + pub struct WorkerBatchQueue { + inner: RwLock>>>, + batch_limit: AtomicUsize, + weight_limit: AtomicU64, + } + + impl WorkerBatchQueue { + pub fn new(batch_limit: usize, weight_limit: u64) -> Self { + let inner = if batch_limit > 0 { + Some(ArrayQueue::new(batch_limit)) + } else { + None + }; + Self { + inner: RwLock::new(inner), + batch_limit: AtomicUsize::new(batch_limit), + weight_limit: AtomicU64::new(weight_limit), + } + } + + pub fn set_limits( + &self, + batch_limit: usize, + weight_limit: u64, + global: &GlobalQueue, + parent: &LocalQueueShared, + ) { + self.weight_limit.set(weight_limit); + // acquire the exclusive lock first to prevent simultaneous updates + let mut queue = self.inner.write(); + let old_limit = self.batch_limit.set(batch_limit); + if old_limit == batch_limit { + return; + } + let old_queue = if batch_limit == 0 { + queue.take() + } else { + queue.replace(ArrayQueue::new(batch_limit)) + }; + if let Some(old_queue) = old_queue { + // try to push tasks from the old queue to the new one and fall back to pushing + // them to the global queue + old_queue + .into_iter() + .filter_map(|task| { + if let Some(new_queue) = queue.as_ref() { + new_queue.push(task).err() + } else { + Some(task) + } + }) + .for_each(|task| parent.push_global(task, global)); + } + } + + pub fn try_push(&self, task: Task) -> Result<(), Task> { + if let Some(queue) = self.inner.read().as_ref() { + queue.push(task) + } else { + Err(task) + } + } + + pub fn try_pop_or_refill( + &self, + global: &GlobalQueue, + parent: &LocalQueueShared, + ) -> Result, PopTaskError> { + // pop from the local queue if it has any tasks + if let Some(local) = self.inner.read().as_ref() { + if !local.is_empty() { + if let Some(task) = local.pop() { + return Ok(task); + } + } + // otherwise pull at least 1 and up to `batch_limit + 1` tasks from the input channel + let first = global.try_pop()?; + // if we succeed in getting the first task, try to refill the local queue + let batch_limit = self.batch_limit.get(); + // batch size 0 means local batching is disabled + if batch_limit > 0 { + let mut iter = global.try_iter(); + let mut batch_size = 0; + let mut total_weight = first.meta.weight() as u64; + let weight_limit = self.weight_limit.get(); + // try to take up to `batch_limit` tasks from the input channel and add them + // to the local queue, but don't block if the input channel is empty; stop + // early if the weight of the queued tasks exceeds the limit + while batch_size < batch_limit + && (weight_limit == 0 || total_weight < weight_limit) + { + if let Some(task) = iter.next() { + let task_weight = task.meta.weight() as u64; + if let Err(task) = local.push(task) { + parent.local_abandoned.push(task); + break; + } + batch_size += 1; + total_weight += task_weight; + } else { + break; + } + } + println!("batch size: {}", batch_size); + } + Ok(first) + } else { + global.try_pop() + } + } + + pub fn drain_into(self, tasks: &mut Vec>) { + if let Some(queue) = self.inner.into_inner() { + tasks.reserve(queue.len()); + while let Some(task) = queue.pop() { + tasks.push(task); + } + } + } + } +} diff --git a/src/hive/inner/queue/mod.rs b/src/hive/inner/queue/mod.rs new file mode 100644 index 0000000..399daeb --- /dev/null +++ b/src/hive/inner/queue/mod.rs @@ -0,0 +1,94 @@ +mod channel; +#[cfg(feature = "retry")] +mod retry; +mod status; +mod workstealing; + +pub use self::channel::ChannelTaskQueues; +pub use self::workstealing::WorkstealingTaskQueues; + +#[cfg(feature = "retry")] +use self::retry::RetryQueue; +use self::status::Status; +use super::{Config, Task, Token}; +use crate::bee::Worker; + +/// Errors that may occur when trying to pop tasks from the global queue. +#[derive(thiserror::Error, Debug)] +pub enum PopTaskError { + #[error("Global task queue is empty")] + Empty, + #[error("Global task queue is closed")] + Closed, +} + +/// Trait that encapsulates the global and local task queues used by a `Hive` for managing tasks +/// within and between worker threads. +/// +/// This trait is sealed - it cannot be implemented outside of this crate. +pub trait TaskQueues: Sized + Send + Sync + 'static { + type WorkerQueues: WorkerQueues; + + /// Returns a new instance. + /// + /// The private `Token` is used to prevent this method from being called externally. + fn new(token: Token) -> Self; + + /// Initializes the local queues for the given range of worker thread indices. + fn init_for_threads(&self, start_index: usize, end_index: usize, config: &Config); + + /// Updates the queue settings from `config` for the given range of worker threads. + fn update_for_threads(&self, start_index: usize, end_index: usize, config: &Config); + + /// Tries to add a task to the global queue. + /// + /// Returns an error with the task if the queue is disconnected. + fn try_push_global(&self, task: Task) -> Result<(), Task>; + + /// Returns a `WorkerQueues` instance for the worker thread with the given `index`. + fn worker_queues(&self, thread_index: usize) -> Self::WorkerQueues; + + /// Closes this `GlobalQueue` so no more tasks may be pushed. + /// + /// If `urgent` is `true`, this also prevents queued tasks from being popped. + /// + /// The private `Token` is used to prevent this method from being called externally. + fn close(&self, urgent: bool, token: Token); + + /// Consumes this `TaskQueues` and Drains all tasks from all global and local queues and + /// returns them as a `Vec`. + /// + /// This method panics if `close` has not been called. + fn drain(self) -> Vec>; +} + +/// Trait that provides access to the task queues to each worker thread. Implementations of this +/// trait can hold thread-local types that are not Send/Sync. +pub trait WorkerQueues { + /// Attempts to add a task to the local queue if space is available, otherwise adds it to the + /// global queue. If adding to the global queue fails, the task is added to a local "abandoned" + /// queue from which it may be popped or will otherwise be converted to an `Unprocessed` + /// outcome. + fn push(&self, task: Task); + + /// Attempts to remove a task from the local queue for the given worker thread index. If there + /// are no local queues, or if the local queues are empty, falls back to taking a task from the + /// global queue. + /// + /// Returns an error if a task is not available, where each implementation may have a different + /// definition of "available". + /// + /// Also returns an error if the queues are closed. + fn try_pop(&self) -> Result, PopTaskError>; + + /// Attempts to add `task` to the local retry queue. + /// + /// Returns the earliest `Instant` at which it might be retried. If the task could not be added + /// to the retry queue (e.g., if the queue is full), the task returned as an error. + #[cfg(feature = "retry")] + fn try_push_retry(&self, task: Task) -> Result>; + + /// Returns the unique index of the thread that owns this `WorkerQueues` instance. + #[cfg(test)] + fn thread_index(&self) -> usize; +} diff --git a/src/hive/inner/queue/retry.rs b/src/hive/inner/queue/retry.rs new file mode 100644 index 0000000..bdc01fb --- /dev/null +++ b/src/hive/inner/queue/retry.rs @@ -0,0 +1,218 @@ +use crate::atomic::{Atomic, AtomicU64}; +use crate::bee::Worker; +use crate::hive::Task; +use std::cell::UnsafeCell; +use std::cmp::Ordering; +use std::collections::BinaryHeap; +use std::time::{Duration, Instant}; + +/// A task queue where each task has an associated `Instant` at which it will be available. +/// +/// This is implemented internally as `UnsafeCell`. +/// +/// SAFETY: This data structure is designed to enable the queue to be modified (using `push` and +/// `try_pop`) by a *single thread* using interior mutability. The `drain` method is called by a +/// different thread, but it first takes ownership of the queue and so will never be called +/// concurrently with `push/pop`. +/// +/// `UnsafeCell` is used for performance - this is safe so long as the queue is only accessed from +/// a single thread at a time. This data structure is *not* thread-safe. +#[derive(Debug)] +pub struct RetryQueue { + inner: UnsafeCell>>, + delay_factor: AtomicU64, +} + +impl RetryQueue { + /// Creates a new `RetryQueue` with the given `delay_factor` (in nanoseconds). + pub fn new(delay_factor: u64) -> Self { + Self { + inner: UnsafeCell::new(BinaryHeap::new()), + delay_factor: AtomicU64::new(delay_factor), + } + } + + /// Changes the delay factor for the queue. + pub fn set_delay_factor(&self, delay_factor: u64) { + self.delay_factor.set(delay_factor); + } + + /// Pushes an item onto the queue. Returns the `Instant` at which the task will be available, + /// or an error with `task` if there was an error pushing it. + /// + /// SAFETY: this method is only ever called within a single thread. + pub fn try_push(&self, task: Task) -> Result> { + unsafe { + match self.inner.get().as_mut() { + Some(queue) => { + // compute the delay + let delay = 2u64 + .checked_pow(task.meta.attempt() as u32 - 1) + .and_then(|multiplier| { + self.delay_factor + .get() + .checked_mul(multiplier) + .or(Some(u64::MAX)) + .map(Duration::from_nanos) + }) + .unwrap_or_default(); + let delayed = DelayedTask::new(task, delay); + let until = delayed.until; + queue.push(delayed); + Ok(until) + } + None => Err(task), + } + } + } + + /// Returns the task at the head of the queue, if one exists and is available (i.e., its delay + /// has been exceeded), and removes it. + /// + /// SAFETY: this method is only ever called within a single thread. + pub fn try_pop(&self) -> Option> { + unsafe { + let queue_ptr = self.inner.get(); + if queue_ptr + .as_ref() + .and_then(|queue| queue.peek()) + .map(|head| head.until <= Instant::now()) + .unwrap_or(false) + { + queue_ptr + .as_mut() + .and_then(|queue| queue.pop()) + .map(|delayed| delayed.value) + } else { + None + } + } + } + + /// Consumes this `RetryQueue` and drains all tasks from the queue into `sink`. + pub fn drain_into(self, sink: &mut Vec>) { + let mut queue = self.inner.into_inner(); + sink.reserve(queue.len()); + sink.extend(queue.drain().map(|delayed| delayed.value)) + } +} + +unsafe impl Sync for RetryQueue {} + +/// Wrapper for a Task with an associated `Instant` at which it will be available. +struct DelayedTask { + value: Task, + until: Instant, +} + +impl DelayedTask { + pub fn new(value: Task, delay: Duration) -> Self { + Self { + value, + until: Instant::now() + delay, + } + } +} + +/// Implements ordering for `Delayed`, so it can be used to correctly order elements in the +/// `BinaryHeap` of the `RetryQueue`. +/// +/// Earlier entries have higher priority (should be popped first), so they are Greater that later +/// entries. +impl Ord for DelayedTask { + fn cmp(&self, other: &DelayedTask) -> Ordering { + other.until.cmp(&self.until) + } +} + +impl PartialOrd for DelayedTask { + fn partial_cmp(&self, other: &DelayedTask) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for DelayedTask { + fn eq(&self, other: &DelayedTask) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl Eq for DelayedTask {} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use super::{RetryQueue, Task, Worker}; + use crate::bee::stock::EchoWorker; + use crate::bee::{TaskId, TaskMeta}; + use std::{thread, time::Duration}; + + type TestWorker = EchoWorker; + const DELAY: u64 = Duration::from_secs(1).as_nanos() as u64; + + impl RetryQueue { + fn len(&self) -> usize { + unsafe { self.inner.get().as_ref().unwrap().len() } + } + } + + impl Task { + /// Creates a new `Task` with the given `task_id`. + fn with_attempt(task_id: TaskId, input: W::Input, attempt: u8) -> Self { + Self { + input, + meta: TaskMeta::with_attempt(task_id, attempt), + outcome_tx: None, + } + } + } + + #[test] + fn test_works() { + let queue = RetryQueue::::new(DELAY); + + let task1 = Task::with_attempt(1, 1, 1); + let task2 = Task::with_attempt(2, 2, 2); + let task3 = Task::with_attempt(3, 3, 3); + + queue.try_push(task1.clone()).unwrap(); + queue.try_push(task2.clone()).unwrap(); + queue.try_push(task3.clone()).unwrap(); + + assert_eq!(queue.len(), 3); + assert_eq!(queue.try_pop(), None); + + thread::sleep(Duration::from_secs(1)); + assert_eq!(queue.try_pop(), Some(task1)); + assert_eq!(queue.len(), 2); + + thread::sleep(Duration::from_secs(1)); + assert_eq!(queue.try_pop(), Some(task2)); + assert_eq!(queue.len(), 1); + + thread::sleep(Duration::from_secs(2)); + assert_eq!(queue.try_pop(), Some(task3)); + assert_eq!(queue.len(), 0); + + assert_eq!(queue.try_pop(), None); + } + + #[test] + fn test_into_vec() { + let queue = RetryQueue::::new(DELAY); + + let task1 = Task::with_attempt(1, 1, 1); + let task2 = Task::with_attempt(2, 2, 2); + let task3 = Task::with_attempt(3, 3, 3); + + queue.try_push(task1.clone()).unwrap(); + queue.try_push(task2.clone()).unwrap(); + queue.try_push(task3.clone()).unwrap(); + + let mut v = Vec::new(); + queue.drain_into(&mut v); + v.sort(); + + assert_eq!(v, vec![task1, task2, task3]); + } +} diff --git a/src/hive/inner/queue/status.rs b/src/hive/inner/queue/status.rs new file mode 100644 index 0000000..4ef219e --- /dev/null +++ b/src/hive/inner/queue/status.rs @@ -0,0 +1,45 @@ +use crate::atomic::{Atomic, AtomicU8}; + +const OPEN: u8 = 0; +const CLOSED_PUSH: u8 = 1; +const CLOSED_POP: u8 = 2; + +/// Represents the status of a task queue. +/// +/// This is a simple state machine +/// OPEN -> CLOSED_PUSH -> CLOSED_POP +/// |________________________^ +pub struct Status(AtomicU8); + +impl Status { + /// Returns `true` if the queue status is `CLOSED_PUSH` or `CLOSED_POP`. + pub fn is_closed(&self) -> bool { + self.0.get() > OPEN + } + + /// Returns `true` if the queue can accept new tasks. + pub fn can_push(&self) -> bool { + self.0.get() < CLOSED_PUSH + } + + /// Returns `true` if the queue can remove tasks. + pub fn can_pop(&self) -> bool { + self.0.get() < CLOSED_POP + } + + /// Sets the queue status to `CLOSED_PUSH` if `urgent` is `false`, or `CLOSED_POP` if `urgent` + /// is `true`. + pub fn set(&self, urgent: bool) { + // TODO: this update should be done with `fetch_max` + let new_status = if urgent { CLOSED_POP } else { CLOSED_PUSH }; + if new_status > self.0.get() { + self.0.set(new_status); + } + } +} + +impl Default for Status { + fn default() -> Self { + Self(AtomicU8::new(OPEN)) + } +} diff --git a/src/hive/inner/queue/workstealing.rs b/src/hive/inner/queue/workstealing.rs new file mode 100644 index 0000000..3ff8647 --- /dev/null +++ b/src/hive/inner/queue/workstealing.rs @@ -0,0 +1,368 @@ +//! Implementation of `TaskQueues` that uses workstealing to distribute tasks among worker threads. +//! Tasks are sent from the `Hive` via a global `Injector` queue. Each worker thread has a local +//! `Worker` queue where tasks can be pushed. If the local queue is empty, the worker thread first +//! tries to steal a task from the global queue and falls back to stealing from another worker +//! thread. If the `local-batch` feature is enabled, a worker thread will try to fill its local queue +//! up to the limit when stealing from the global queue. +use super::{Config, PopTaskError, Status, Task, TaskQueues, Token, WorkerQueues}; +use crate::atomic::{Atomic, AtomicBool}; +use crate::bee::Worker; +use crossbeam_deque::{Injector, Stealer}; +use crossbeam_queue::SegQueue; +use crossbeam_utils::Backoff; +use derive_more::Debug; +use nanorand::{Rng, tls as rand}; +use parking_lot::RwLock; +use std::any; +use std::ops::Deref; +use std::sync::Arc; + +/// `TaskQueues` implementation using workstealing. +#[derive(Debug)] +#[debug("WorkstealingTaskQueues<{}>", any::type_name::())] +pub struct WorkstealingTaskQueues { + global: Arc>, + local: RwLock>>>, +} + +impl TaskQueues for WorkstealingTaskQueues { + type WorkerQueues = WorkstealingWorkerQueues; + + fn new(_: Token) -> Self { + Self { + global: Arc::new(GlobalQueue::new()), + local: Default::default(), + } + } + + fn init_for_threads(&self, start_index: usize, end_index: usize, config: &Config) { + let mut local_queues = self.local.write(); + assert_eq!(local_queues.len(), start_index); + (start_index..end_index).for_each(|thread_index| { + local_queues.push(Arc::new(LocalQueueShared::new(thread_index, config))); + }); + } + + fn update_for_threads(&self, start_index: usize, end_index: usize, config: &Config) { + let local_queues = self.local.read(); + assert!(local_queues.len() >= end_index); + (start_index..end_index).for_each(|thread_index| local_queues[thread_index].update(config)); + } + + fn worker_queues(&self, thread_index: usize) -> Self::WorkerQueues { + let local_queue = crossbeam_deque::Worker::new_fifo(); + self.global.add_stealer(local_queue.stealer()); + WorkstealingWorkerQueues::new(local_queue, &self.global, &self.local.read()[thread_index]) + } + + fn try_push_global(&self, task: Task) -> Result<(), Task> { + self.global.try_push(task) + } + + fn close(&self, urgent: bool, _: Token) { + self.global.close(urgent); + } + + fn drain(self) -> Vec> { + if !self.global.is_closed() { + panic!("close must be called before drain"); + } + let mut tasks = Vec::new(); + let global = crate::hive::util::unwrap_arc(self.global) + .unwrap_or_else(|_| panic!("timeout waiting to take ownership of global queue")); + global.drain_into(&mut tasks); + for local in self.local.into_inner().into_iter() { + let local = crate::hive::util::unwrap_arc(local) + .unwrap_or_else(|_| panic!("timeout waiting to take ownership of local queue")); + local.drain_into(&mut tasks); + } + tasks + } +} + +pub struct GlobalQueue { + queue: Injector>, + stealers: RwLock>>>, + status: Status, +} + +impl GlobalQueue { + fn new() -> Self { + Self { + queue: Injector::new(), + stealers: Default::default(), + status: Default::default(), + } + } + + fn add_stealer(&self, stealer: Stealer>) { + self.stealers.write().push(stealer); + } + + fn try_push(&self, task: Task) -> Result<(), Task> { + if !self.status.can_push() { + return Err(task); + } + self.queue.push(task); + Ok(()) + } + + /// Tries to steal a task from a random worker using its `Stealer`. + /// + /// Returns the task if one is stolen successfully, otherwise snoozes for a bit and then + /// returns `PopTaskError::Empty`. Returns `PopTaskError::Closed` if the queue is closed. + fn try_steal_from_worker_or_snooze(&self, backoff: &Backoff) -> Result, PopTaskError> { + let stealers = self.stealers.read(); + let n = stealers.len(); + // randomize the stealing order, to prevent always stealing from the same thread + std::iter::from_fn(|| Some(rand::tls_rng().generate_range(0..n))) + .take(n) + .filter_map(|i| stealers[i].steal().success()) + .next() + .ok_or_else(|| { + if self.is_closed() && self.queue.is_empty() { + PopTaskError::Closed + } else { + // TODO: instead try the parking approach used in rust-executors, which seems + // more performant under most circumstances + // https://github.com/Bathtor/rust-executors/blob/master/executors/src/crossbeam_workstealing_pool.rs#L976 + backoff.snooze(); + PopTaskError::Empty + } + }) + } + + /// Tries to steal a task from the global queue, otherwise tries to steal a task from another + /// worker thread. + fn try_pop_unchecked(&self, backoff: &Backoff) -> Result, PopTaskError> { + if let Some(task) = self.queue.steal().success() { + Ok(task) + } else { + self.try_steal_from_worker_or_snooze(backoff) + } + } + + /// Tries to steal up to `limit + 1` tasks from the global queue. If at least one task was + /// stolen, it is popped and returned. Otherwise tries to steal a task from another worker + /// thread. + #[cfg(feature = "local-batch")] + fn try_refill_and_pop( + &self, + local_batch: &crossbeam_deque::Worker>, + batch_limit: usize, + weight_limit: u64, + backoff: &Backoff, + ) -> Result, PopTaskError> { + // if we only have a size limit but not a weight limit, use the batch-stealing function + // provided by `Injector` + if batch_limit > 0 && weight_limit == 0 { + if let Some(first) = self + .queue + .steal_batch_with_limit_and_pop(local_batch, batch_limit + 1) + .success() + { + return Ok(first); + } + } + // try to steal at least one from the global queue + if let Some(first) = self.queue.steal().success() { + if batch_limit > 0 && weight_limit > 0 { + // if batching is enabled and we have a weight limit, try to steal a batch of tasks + // from the global queue one at a time + let mut batch_size = 0; + let mut total_weight = first.meta.weight() as u64; + while let Some(task) = self.queue.steal().success() { + total_weight += task.meta.weight() as u64; + local_batch.push(task); + if total_weight >= weight_limit { + break; + } + batch_size += 1; + if batch_size >= batch_limit { + break; + } + } + } + return Ok(first); + } + self.try_steal_from_worker_or_snooze(backoff) + } + + fn is_closed(&self) -> bool { + self.status.is_closed() + } + + fn close(&self, urgent: bool) { + self.status.set(urgent); + } + + fn drain_into(self, tasks: &mut Vec>) { + while let Some(task) = self.queue.steal().success() { + tasks.push(task); + } + // since the `TaskQueues` instance does not retain a reference to the workers' queues + // (it can't, because they're not Send/Sync), the only way we have to drain them is via + // their stealers + self.stealers.into_inner().into_iter().for_each(|stealer| { + while let Some(task) = stealer.steal().success() { + tasks.push(task); + } + }) + } +} + +pub struct WorkstealingWorkerQueues { + local: crossbeam_deque::Worker>, + global: Arc>, + shared: Arc>, + backoff: Backoff, + snoozing: AtomicBool, +} + +impl WorkstealingWorkerQueues { + fn new( + local: crossbeam_deque::Worker>, + global: &Arc>, + shared: &Arc>, + ) -> Self { + Self { + global: Arc::clone(global), + local, + shared: Arc::clone(shared), + backoff: Backoff::new(), + snoozing: Default::default(), + } + } +} + +impl WorkerQueues for WorkstealingWorkerQueues { + fn push(&self, task: Task) { + self.local.push(task); + } + + fn try_pop(&self) -> Result, PopTaskError> { + let result = self + .shared + .try_pop(&self.global, &self.local, &self.backoff); + match &result { + Ok(_) | Err(PopTaskError::Closed) if self.snoozing.get() => { + // if the worker has been snoozing and got a task, reset the backoff + self.backoff.reset(); + self.snoozing.set(false); + } + Err(PopTaskError::Empty) => { + // if the queue was empty, the worker must have snoozed + self.snoozing.set(true); + } + _ => (), + }; + result + } + + #[cfg(feature = "retry")] + fn try_push_retry(&self, task: Task) -> Result> { + self.shared.try_push_retry(task) + } + + #[cfg(test)] + fn thread_index(&self) -> usize { + self.shared._thread_index + } +} + +impl Deref for WorkstealingWorkerQueues { + type Target = Self; + + fn deref(&self) -> &Self::Target { + self + } +} + +/// Worker thread-specific data shared with the main thread. +struct LocalQueueShared { + _thread_index: usize, + /// queue of abandon tasks + local_abandoned: SegQueue>, + /// limit on the number of tasks that can be queued + #[cfg(feature = "local-batch")] + batch_limit: crate::atomic::AtomicUsize, + /// limit on the total weight of active + queued tasks + #[cfg(feature = "local-batch")] + weight_limit: crate::atomic::AtomicU64, + /// thread-local queues used for tasks that are waiting to be retried after a failure + #[cfg(feature = "retry")] + local_retry: super::RetryQueue, +} + +impl LocalQueueShared { + fn new(thread_index: usize, _config: &Config) -> Self { + Self { + _thread_index: thread_index, + local_abandoned: Default::default(), + #[cfg(feature = "local-batch")] + batch_limit: crate::atomic::AtomicUsize::new(_config.batch_limit.get_or_default()), + #[cfg(feature = "retry")] + local_retry: super::RetryQueue::new(_config.retry_factor.get_or_default()), + #[cfg(feature = "local-batch")] + weight_limit: crate::atomic::AtomicU64::new(_config.weight_limit.get_or_default()), + } + } + + fn update(&self, _config: &Config) { + #[cfg(feature = "local-batch")] + self.batch_limit.set(_config.batch_limit.get_or_default()); + #[cfg(feature = "local-batch")] + self.weight_limit.set(_config.weight_limit.get_or_default()); + #[cfg(feature = "retry")] + self.local_retry + .set_delay_factor(_config.retry_factor.get_or_default()); + } + + fn try_pop( + &self, + global: &GlobalQueue, + local_batch: &crossbeam_deque::Worker>, + backoff: &Backoff, + ) -> Result, PopTaskError> { + if !global.status.can_pop() { + return Err(PopTaskError::Closed); + } + // first try to get a previously abandoned task + if let Some(task) = self.local_abandoned.pop() { + return Ok(task); + } + // if retry is enabled, try to get a task from the retry queue + #[cfg(feature = "retry")] + if let Some(task) = self.local_retry.try_pop() { + return Ok(task); + } + // next try the local queue + if let Some(task) = local_batch.pop() { + return Ok(task); + } + // fall back to requesting a task from the global queue - if local batching is enabled, + // this will also try to refill the local queue + #[cfg(feature = "local-batch")] + { + let batch_limit = self.batch_limit.get(); + if batch_limit > 0 { + let weight_limit = self.weight_limit.get(); + return global.try_refill_and_pop(local_batch, batch_limit, weight_limit, backoff); + } + } + global.try_pop_unchecked(backoff) + } + + fn drain_into(self, tasks: &mut Vec>) { + while let Some(task) = self.local_abandoned.pop() { + tasks.push(task); + } + #[cfg(feature = "retry")] + self.local_retry.drain_into(tasks); + } + + #[cfg(feature = "retry")] + fn try_push_retry(&self, task: Task) -> Result> { + self.local_retry.try_push(task) + } +} diff --git a/src/hive/inner/shared.rs b/src/hive/inner/shared.rs new file mode 100644 index 0000000..dc9728d --- /dev/null +++ b/src/hive/inner/shared.rs @@ -0,0 +1,762 @@ +use super::{Config, PopTaskError, Shared, Task, TaskInput, TaskQueues, Token, WorkerQueues}; +use crate::atomic::{Atomic, AtomicInt, AtomicUsize}; +use crate::bee::{Queen, TaskId, Worker}; +use crate::channel::SenderExt; +use crate::hive::{Husk, Outcome, OutcomeSender, SpawnError}; +use parking_lot::MutexGuard; +use std::collections::HashMap; +use std::ops::DerefMut; +use std::thread::{Builder, JoinHandle}; +use std::{fmt, iter}; + +impl, T: TaskQueues> Shared { + /// Creates a new `Shared` instance with the given configuration, queen, and task receiver, + /// and all other fields set to their default values. + pub fn new(config: Config, queen: Q) -> Self { + let task_queues = T::new(Token); + Shared { + config, + queen, + task_queues, + spawn_results: Default::default(), + num_tasks: Default::default(), + next_task_id: Default::default(), + num_panics: Default::default(), + num_referrers: AtomicUsize::new(1), + poisoned: Default::default(), + suspended: Default::default(), + resume_gate: Default::default(), + join_gate: Default::default(), + outcomes: Default::default(), + } + } + + /// Returns a `Builder` for creating a new thread in the `Hive`. + pub fn thread_builder(&self) -> Builder { + let mut builder = Builder::new(); + if let Some(ref name) = self.config.thread_name.get() { + builder = builder.name(name.clone()); + } + if let Some(ref stack_size) = self.config.thread_stack_size.get() { + builder = builder.stack_size(stack_size.to_owned()); + } + builder + } + + /// Returns the current number of worker threads. + pub fn num_threads(&self) -> usize { + self.config.num_threads.get_or_default() + } + + /// Spawns the initial set of `self.config.num_threads` worker threads using the provided + /// spawning function. The results are stored in `self.spawn_results[0..num_threads]`. Returns + /// the number of worker threads that were successfully started. + pub fn init_threads(&self, f: F) -> usize + where + F: Fn(usize) -> Result, SpawnError>, + { + let num_threads = self.num_threads(); + if num_threads == 0 { + return 0; + } + let mut spawn_results = self.spawn_results.lock(); + self.spawn_threads(0, num_threads, f, &mut spawn_results) + } + + /// Increases the maximum number of threads allowed in the `Hive` by `num_threads`, and + /// attempts to spawn threads with indices in the range `cur_index..cur_index + num_threads` + /// using the provided spawning function. The results are stored in `self.spawn_results[range]`. + /// Returns the number of new worker threads that were successfully started. + pub fn grow_threads(&self, num_threads: usize, f: F) -> usize + where + F: Fn(usize) -> Result, SpawnError>, + { + let mut spawn_results = self.spawn_results.lock(); + let start_index = self.config.num_threads.add(num_threads).unwrap(); + self.spawn_threads(start_index, num_threads, f, &mut spawn_results) + } + + fn spawn_threads( + &self, + start_index: usize, + num_threads: usize, + f: F, + spawn_results: &mut Vec, SpawnError>>, + ) -> usize + where + F: Fn(usize) -> Result, SpawnError>, + { + assert_eq!(spawn_results.len(), start_index); + let end_index = start_index + num_threads; + // if worker threads need a local queue, initialize them before spawning + self.task_queues + .init_for_threads(start_index, end_index, &self.config); + // spawn the worker threads and return the results + let results: Vec<_> = (start_index..end_index).map(f).collect(); + spawn_results.reserve(num_threads); + results + .into_iter() + .map(|result| { + let started = result.is_ok(); + spawn_results.push(result); + started + }) + .filter(|started| *started) + .count() + } + + /// Attempts to spawn a thread to replace the one at the specified `index` using the provided + /// spawning function. The result is stored in `self.spawn_results[index]`. Returns the + /// spawn result for the previous thread at the same index. + pub fn respawn_thread(&self, index: usize, f: F) -> Result, SpawnError> + where + F: FnOnce(usize) -> Result, SpawnError>, + { + let result = f(index); + let mut spawn_results = self.spawn_results.lock(); + assert!(spawn_results.len() > index); + // Note: we do *not* want to wait on the `JoinHandle` for the previous thread as it may + // still be processing a task + std::mem::replace(&mut spawn_results[index], result) + } + + /// Attempts to respawn any threads that are currently dead using the provided spawning + /// function. Returns the number of threads that were successfully respawned. + //#[cfg_attr(coverage(off)] // no idea how to test this + pub fn respawn_dead_threads(&self, f: F) -> usize + where + F: Fn(usize) -> Result, SpawnError>, + { + self.spawn_results + .lock() + .iter_mut() + .enumerate() + .filter(|(_, result)| result.is_err()) + .map(|(i, result)| { + let new_result = f(i); + let started = new_result.is_ok(); + *result = new_result; + started + }) + .filter(|started| *started) + .count() + } + + /// Returns the mutex guard for the results of spawing worker threads. + pub fn spawn_results(&self) -> MutexGuard, SpawnError>>> { + self.spawn_results.lock() + } + + /// Returns the `WorkerQueues` instance for the worker thread with the specified index. + pub fn worker_queues(&self, thread_index: usize) -> T::WorkerQueues { + self.task_queues.worker_queues(thread_index) + } + + /// Returns a new `Worker` from the queen, or an error if a `Worker` could not be created. + pub fn create_worker(&self) -> Q::Kind { + self.queen.create() + } + + /// Creates a new `Task` for the given input and outcome channel, and adds it to the global + /// queue. + pub fn send_one_global(&self, input: I, outcome_tx: Option<&OutcomeSender>) -> TaskId + where + I: Into>, + { + if self.num_threads() == 0 { + dbg!("WARNING: no worker threads are active for hive"); + } + let task = self.prepare_task(input, outcome_tx); + // when the `local-batch` feature is enabled, immediately abandon any task whose weight is + // greater than the configured limit + #[cfg(feature = "local-batch")] + let task = match self.abandon_if_too_heavy(task) { + Ok(task) => task, + Err(task_id) => return task_id, + }; + let task_id = task.id(); + self.push_global(task); + task_id + } + + /// Creates a new `Task` for each input in the given batch and sends them to the global queue. + pub fn send_batch_global( + &self, + batch: B, + outcome_tx: Option<&OutcomeSender>, + ) -> Vec + where + I: Into>, + B: IntoIterator, + B::IntoIter: ExactSizeIterator, + { + #[cfg(debug_assertions)] + if self.num_threads() == 0 { + dbg!("WARNING: no worker threads are active for hive"); + } + let iter = batch.into_iter(); + let (min_size, _) = iter.size_hint(); + self.num_tasks + .increment_left(min_size as u64) + .expect("overflowed queued task counter"); + let task_id_start = self.next_task_id.add(min_size); + let task_id_end = task_id_start + min_size; + let tasks = iter + .map(Some) + .chain(iter::repeat_with(|| None)) + .zip( + (task_id_start..task_id_end) + .map(Some) + .chain(iter::repeat_with(|| None)), + ) + .map_while(move |pair| match pair { + (Some(input), Some(task_id)) => { + Some(Task::new(task_id, input.into(), outcome_tx.cloned())) + } + (Some(input), None) => Some(self.prepare_task(input, outcome_tx)), + (None, Some(_)) => panic!("batch contained fewer than {min_size} items"), + (None, None) => None, + }); + if !self.is_poisoned() { + tasks + .map(|task| { + let task_id = task.id(); + // try to send the task to the hive; if sending fails, convert the task into an + // `Unprocessed` outcome and try to send it to the outcome channel; if that + // fails, store the outcome in the hive + if let Err(task) = self.task_queues.try_push_global(task) { + self.abandon_task(task); + } + task_id + }) + .collect() + } else { + // if the hive is poisoned, convert all tasks into `Unprocessed` outcomes and try to + // send them to their outcome channels or store them in the hive + self.abandon_batch(tasks) + } + } + + /// Increments the number of queued tasks. Returns a new `Task` with the provided input and + /// `outcome_tx` and the next ID. + pub fn prepare_task(&self, input: I, outcome_tx: Option<&OutcomeSender>) -> Task + where + I: Into>, + { + self.num_tasks + .increment_left(1) + .expect("overflowed queued task counter"); + let task_id = self.next_task_id.add(1); + Task::new(task_id, input.into(), outcome_tx.cloned()) + } + + /// Adds `task` to the global queue if possible, otherwise abandons it - converts it to an + /// `Unprocessed` outcome and sends it to the outcome channel or stores it in the hive. + pub fn push_global(&self, task: Task) { + // try to send the task to the hive; if the hive is poisoned or if sending fails, convert + // the task into an `Unprocessed` outcome and try to send it to the outcome channel; if + // that fails, store the outcome in the hive + if let Some(abandoned_task) = if self.is_poisoned() { + Some(task) + } else { + self.task_queues.try_push_global(task).err() + } { + self.abandon_task(abandoned_task); + } + } + + /// Returns the next available `Task`. If there is a task in any local queue, it is returned, + /// otherwise a task is requested from the global queue. + /// + /// If the hive is suspended, the calling thread blocks until the `Hive` is resumed. + /// The calling thread also blocks until a task becomes available. + /// + /// Returns an error if the hive is poisoned or if the local queues are empty, and the global + /// queue is disconnected. + pub fn get_next_task(&self, worker_queues: &T::WorkerQueues) -> Option> { + loop { + // block while the hive is suspended + self.wait_on_resume(); + // stop iteration if the hive is poisoned + if self.is_poisoned() { + return None; + } + // get the next task from the queue - break if its closed + match worker_queues.try_pop() { + Ok(task) => break Some(task), + Err(PopTaskError::Closed) => break None, + Err(PopTaskError::Empty) => continue, + } + } + // if a task was successfully received, decrement the queued counter and increment the + // active counter + .and_then(|task| match self.num_tasks.transfer(1) { + Ok(_) => Some(task), + Err(_) => { + // the hive is in a corrupted state - abandon this task and then poison the hive + // so it can't be used anymore + self.abandon_task(task); + self.poison(); + None + } + }) + } + + pub fn abandon_task(&self, task: Task) { + let (outcome, outcome_tx) = task.into_unprocessed(); + self.send_or_store_outcome(outcome, outcome_tx); + // decrement the queued counter since it was incremented but the task was never queued + let _ = self.num_tasks.decrement_left(1); + self.no_work_notify_all(); + } + + /// Converts each `Task` in the iterator into `Outcome::Unprocessed` and attempts to send it + /// to its `OutcomeSender` if there is one, or stores it if there is no sender or the send + /// fails. Returns a vector of task_ids of the tasks. + pub fn abandon_batch(&self, tasks: I) -> Vec + where + I: Iterator>, + { + // don't unlock outcomes unless we have to + let mut outcomes = Option::None; + let task_ids: Vec<_> = tasks + .map(|task| { + let task_id = task.id(); + let (outcome, outcome_tx) = task.into_unprocessed(); + if let Some(outcome) = if let Some(tx) = outcome_tx { + tx.try_send_msg(outcome) + } else { + Some(outcome) + } { + outcomes + .get_or_insert_with(|| self.outcomes.get_mut()) + .insert(task_id, outcome); + } + task_id + }) + .collect(); + // decrement the queued counter since it was incremented but the tasks were never queued + let _ = self.num_tasks.decrement_left(task_ids.len() as u64); + self.no_work_notify_all(); + task_ids + } + + #[cfg(feature = "local-batch")] + pub fn abandon_if_too_heavy(&self, task: Task) -> Result, TaskId> { + let weight_limit = self.config.weight_limit.get().unwrap_or(0); + if weight_limit > 0 && task.meta().weight() as u64 > weight_limit { + let task_id = task.id(); + let (outcome, outcome_tx) = task.into_overweight(); + self.send_or_store_outcome(outcome, outcome_tx); + // decrement the queued counter since it was incremented but the task was never queued + let _ = self.num_tasks.decrement_left(1); + self.no_work_notify_all(); + Err(task_id) + } else { + Ok(task) + } + } + + /// Sends an outcome to `outcome_tx`, or stores it in the `Hive` shared data if there is no + /// sender, or if the send fails. + pub fn send_or_store_outcome(&self, outcome: Outcome, outcome_tx: Option>) { + if let Some(outcome) = if let Some(tx) = outcome_tx { + tx.try_send_msg(outcome) + } else { + Some(outcome) + } { + self.add_outcome(outcome) + } + } + + /// Called by a worker thread after completing a task. Notifies any thread that has `join`ed + /// the `Hive` if there is no more work to be done. + #[inline] + pub fn finish_task(&self, panicking: bool) { + self.num_tasks + .decrement_right(1) + .expect("active task counter was smaller than expected"); + if panicking { + self.num_panics.add(1); + } + self.no_work_notify_all(); + } + + /// Returns a reference to the `Queen`. + /// + /// Note that, if the queen is a `QueenMut`, the returned value will be a `QueenCell`, and it + /// is necessary to call its `get()` method to obtain a reference to the inner queen. + pub fn queen(&self) -> &Q { + &self.queen + } + + /// Returns a tuple with the number of (queued, active) tasks. + #[inline] + pub fn num_tasks(&self) -> (u64, u64) { + self.num_tasks.get() + } + + /// Returns `true` if the hive has not been poisoned and there are either active tasks or there + /// are queued tasks and the cancelled flag hasn't been set. + #[inline] + pub fn has_work(&self) -> bool { + !self.is_poisoned() && { + let (queued, active) = self.num_tasks(); + active > 0 || (!self.is_suspended() && queued > 0) + } + } + + /// Blocks the current thread until all active tasks have been processed. Also waits until all + /// queued tasks have been processed unless the suspended flag has been set. + pub fn wait_on_done(&self) { + self.join_gate.wait_while(|| self.has_work()); + } + + /// Notify all observers joining this hive when all tasks have been completed. + pub fn no_work_notify_all(&self) { + if !self.has_work() { + self.join_gate.notify_all(); + } + } + + pub fn num_panics(&self) -> usize { + self.num_panics.get() + } + + /// Returns the number of `Hive`s holding a reference to this shared data. + pub fn num_referrers(&self) -> usize { + self.num_referrers.get() + } + + /// Increments the number of referrers and returns the previous value. + pub fn referrer_is_cloning(&self) -> usize { + self.num_referrers.add(1) + } + + /// Decrements the number of referrers and returns the previous value. + pub fn referrer_is_dropping(&self) -> usize { + self.num_referrers.sub(1) + } + + /// Performs the following actions: + /// 1. Sets the `poisoned` flag to `true + /// 2. Closes all task queues so no more tasks may be pushed + /// 3. Resumes the hive if it is suspendend, which enables blocked worker threads to terminate. + pub fn poison(&self) { + self.poisoned.set(true); + self.close_task_queues(true); + self.set_suspended(false); + } + + /// Returns `true` if the hive has been poisoned. A poisoned have may accept new tasks but will + /// never process them. Unprocessed tasks can be retrieved by calling `take_outcomes` or + /// `try_into_husk`. + #[inline] + pub fn is_poisoned(&self) -> bool { + self.poisoned.get() + } + + /// Sets the `suspended` flag. If `true`, worker threads may terminate early, and no new tasks + /// will be started until this flag is set to `false`. Returns `true` if the value was changed. + pub fn set_suspended(&self, suspended: bool) -> bool { + if self.suspended.set(suspended) == suspended { + false + } else { + if !suspended { + self.resume_gate.notify_all(); + } + true + } + } + + /// Returns `true` if the `suspended` flag has been set. + #[inline] + pub fn is_suspended(&self) -> bool { + self.suspended.get() + } + + #[inline] + pub fn wait_on_resume(&self) { + self.resume_gate.wait_while(|| self.is_suspended()); + } + + /// Returns a mutable reference to the retained task outcomes. + pub fn outcomes(&self) -> impl DerefMut>> { + self.outcomes.get_mut() + } + + /// Adds a new outcome to the retained task outcomes. + pub fn add_outcome(&self, outcome: Outcome) { + self.outcomes.push(outcome); + } + + /// Removes and returns all retained `Unprocessed` outcomes. + pub fn take_unprocessed(&self) -> Vec> { + let mut outcomes = self.outcomes.get_mut(); + let unprocessed_task_ids: Vec<_> = outcomes + .keys() + .cloned() + .filter(|task_id| matches!(outcomes.get(task_id), Some(Outcome::Unprocessed { .. }))) + .collect(); + unprocessed_task_ids + .into_iter() + .map(|task_id| outcomes.remove(&task_id).unwrap()) + .collect() + } + + /// Close the tasks queues so no more tasks can be added. + pub fn close_task_queues(&self, urgent: bool) { + self.task_queues.close(urgent, Token); + } + + fn flush( + task_queues: T, + mut outcomes: HashMap>, + ) -> HashMap> { + for task in task_queues.drain().into_iter() { + let task_id = task.id(); + let (outcome, outcome_tx) = task.into_unprocessed(); + if let Some(outcome) = if let Some(tx) = outcome_tx { + tx.try_send_msg(outcome) + } else { + Some(outcome) + } { + outcomes.insert(task_id, outcome); + } + } + outcomes + } + + /// Consumes this `Shared`, closes and drains task queues, converts any queued tasks into + /// `Outcome::Unprocessed outcomes, and tries to send them or (if the task does not have a + /// sender, or if the send fails) stores them in the `outcomes` map. Returns the outcome map. + pub fn into_outcomes(self) -> HashMap> { + Self::flush(self.task_queues, self.outcomes.into_inner()) + } + + /// Consumes this `Shared` and returns a `Husk` containing the `Queen`, panic count, stored + /// outcomes, and all configuration information necessary to create a new `Hive`. Any queued + /// tasks are converted into `Outcome::Unprocessed` outcomes and either sent to the task's + /// sender or (if there is no sender, or the send fails) stored in the `outcomes` map. + pub fn into_husk(self) -> Husk { + Husk::new( + self.config.into_unsync(), + self.queen, + self.num_panics.into_inner(), + Self::flush(self.task_queues, self.outcomes.into_inner()), + ) + } +} + +impl fmt::Debug for Shared +where + W: Worker, + Q: Queen, + T: TaskQueues, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let (queued, active) = self.num_tasks(); + f.debug_struct("Shared") + .field("name", &self.config.thread_name) + .field("num_threads", &self.config.num_threads) + .field("num_tasks_queued", &queued) + .field("num_tasks_active", &active) + .finish() + } +} + +#[cfg(any(feature = "local-batch", feature = "retry"))] +mod update_config { + use super::Shared; + use crate::atomic::{Atomic, AtomicOption}; + use crate::bee::{Queen, Worker}; + use crate::hive::TaskQueues; + use std::fmt::Debug; + + impl Shared + where + W: Worker, + Q: Queen, + T: TaskQueues, + { + fn maybe_update(&self, new_value: P, option: &AtomicOption) -> P + where + P: Eq + Copy + Clone + Debug + Default, + A: Atomic

, + { + let prev_value = option.try_set(new_value).unwrap_or_default(); + if prev_value == new_value { + return prev_value; + } + let num_threads = self.num_threads(); + if num_threads == 0 { + return prev_value; + } + self.task_queues + .update_for_threads(0, num_threads, &self.config); + prev_value + } + + /// Changes the local queue batch size. This requires allocating a new queue for each + /// worker thread. + /// + /// Note: this method will block the current thread waiting for all local queues to become + /// writable; if `batch_limit` is less than the current batch size, this method will also + /// block while any thread's queue length is > `batch_limit` before moving the elements. + #[cfg(feature = "local-batch")] + pub fn set_worker_batch_limit(&self, batch_limit: usize) -> usize { + self.maybe_update(batch_limit, &self.config.batch_limit) + } + + /// Changes the local queue batch weight limit. + #[cfg(feature = "local-batch")] + pub fn set_worker_weight_limit(&self, weight_limit: u64) -> u64 { + self.maybe_update(weight_limit, &self.config.weight_limit) + } + + /// Sets the worker retry limit and returns the previous value. + #[cfg(feature = "retry")] + pub fn set_worker_retry_limit(&self, max_retries: u8) -> u8 { + self.maybe_update(max_retries, &self.config.max_retries) + } + + /// Sets the worker retry factor and returns the previous value. + #[cfg(feature = "retry")] + pub fn set_worker_retry_factor( + &self, + duration: std::time::Duration, + ) -> std::time::Duration { + std::time::Duration::from_nanos( + self.maybe_update(duration.as_nanos() as u64, &self.config.retry_factor), + ) + } + } +} + +#[cfg(feature = "affinity")] +mod affinity { + use super::{Shared, TaskQueues}; + use crate::bee::{Queen, Worker}; + use crate::hive::cores::{Core, Cores}; + + impl Shared + where + W: Worker, + Q: Queen, + T: TaskQueues, + { + /// Adds cores to which worker threads may be pinned. + pub fn add_core_affinity(&self, new_cores: Cores) { + let _ = self.config.affinity.try_update_with(|mut affinity| { + let updated = affinity.union(&new_cores) > 0; + updated.then_some(affinity) + }); + } + + /// Returns the `Core` to which the specified worker thread may be pinned, if any. + pub fn get_core_affinity(&self, thread_index: usize) -> Option { + self.config + .affinity + .get() + .and_then(|cores| cores.get(thread_index)) + } + } +} + +#[cfg(feature = "local-batch")] +mod local_batch { + use crate::bee::{Queen, Worker}; + use crate::hive::inner::{Shared, TaskQueues}; + + impl Shared + where + W: Worker, + Q: Queen, + T: TaskQueues, + { + /// Returns the local queue batch size. + pub fn worker_batch_limit(&self) -> usize { + self.config.batch_limit.get().unwrap_or_default() + } + + /// Returns the local queue batch weight limit. A value of `0` means there is no weight + pub fn worker_weight_limit(&self) -> u64 { + self.config.weight_limit.get().unwrap_or_default() + } + } +} + +#[cfg(feature = "retry")] +mod retry { + use crate::bee::{Queen, TaskMeta, Worker}; + use crate::hive::inner::{Shared, Task, TaskQueues}; + use crate::hive::{OutcomeSender, WorkerQueues}; + use std::time::Instant; + + impl Shared + where + W: Worker, + Q: Queen, + T: TaskQueues, + { + /// Returns the current worker retry limit. + pub fn worker_retry_limit(&self) -> u8 { + self.config.max_retries.get().unwrap_or_default() + } + + /// Returns the current worker retry factor. + pub fn worker_retry_factor(&self) -> std::time::Duration { + std::time::Duration::from_nanos(self.config.retry_factor.get().unwrap_or_default()) + } + + /// Returns `true` if the hive is configured to retry tasks and the `attempt` field of the + /// given `task_meta` is less than the maximum number of retries. + pub fn can_retry(&self, task_meta: &TaskMeta) -> bool { + self.config + .max_retries + .get() + .map(|max_retries| task_meta.attempt() < max_retries) + .unwrap_or(false) + } + + /// Adds a task with the given `task_id`, `input`, and `outcome_tx` to the local retry + /// queue for the specified `thread_index`. + pub fn try_send_retry( + &self, + input: W::Input, + meta: TaskMeta, + outcome_tx: Option<&OutcomeSender>, + worker_queues: &T::WorkerQueues, + ) -> Result> { + self.num_tasks + .increment_left(1) + .expect("overflowed queued task counter"); + let task = Task::next_retry_attempt(input, meta, outcome_tx.cloned()); + worker_queues.try_push_retry(task) + } + } +} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use crate::bee::DefaultQueen; + use crate::bee::stock::ThunkWorker; + use crate::hive::ChannelTaskQueues; + + type VoidThunkWorker = ThunkWorker<()>; + type VoidThunkWorkerShared = + super::Shared, ChannelTaskQueues>; + + #[test] + fn test_sync_shared() { + fn assert_sync() {} + assert_sync::(); + } + + #[test] + fn test_send_shared() { + fn assert_send() {} + assert_send::(); + } +} diff --git a/src/hive/inner/task.rs b/src/hive/inner/task.rs new file mode 100644 index 0000000..68219bc --- /dev/null +++ b/src/hive/inner/task.rs @@ -0,0 +1,141 @@ +use super::Task; +use crate::bee::{TaskId, TaskMeta, Worker}; +use crate::hive::{Outcome, OutcomeSender}; + +/// The type of input to a task for a given `Worker` type. This changes depending on the features +/// that are enabled. +pub use task_impl::TaskInput; + +impl Task { + /// Returns the ID of the task. + #[inline] + pub fn id(&self) -> TaskId { + self.meta.id() + } + + /// Returns a reference to the task metadata. + #[inline] + pub fn meta(&self) -> &TaskMeta { + &self.meta + } + + /// Consumes this `Task` and returns its input, metadata, and outcome sender. + pub fn into_parts(self) -> (W::Input, TaskMeta, Option>) { + (self.input, self.meta, self.outcome_tx) + } + + /// Consumes this `Task` and returns an `Outcome::Unprocessed` outcome with the input and ID, + /// and the outcome sender. + pub fn into_unprocessed(self) -> (Outcome, Option>) { + let outcome = Outcome::Unprocessed { + input: self.input, + task_id: self.meta.id(), + }; + (outcome, self.outcome_tx) + } + + /// Creates a new `Task` with the given metadata, and increments the attempt number. + #[cfg(feature = "retry")] + pub fn next_retry_attempt( + input: W::Input, + mut meta: TaskMeta, + outcome_tx: Option>, + ) -> Self { + meta.inc_attempt(); + Self { + input, + meta, + outcome_tx, + } + } +} + +#[cfg(not(feature = "local-batch"))] +mod task_impl { + use super::Task; + use crate::bee::{TaskId, TaskMeta, Worker}; + use crate::hive::OutcomeSender; + + pub type TaskInput = ::Input; + + impl Task { + /// Creates a new `Task` with the given `task_id`. + pub fn new( + task_id: TaskId, + input: TaskInput, + outcome_tx: Option>, + ) -> Self { + Task { + input, + meta: TaskMeta::new(task_id), + outcome_tx, + } + } + } +} + +#[cfg(feature = "local-batch")] +mod task_impl { + use super::Task; + use crate::bee::{TaskId, TaskMeta, Worker}; + use crate::hive::{Outcome, OutcomeSender, Weighted}; + + pub type TaskInput = Weighted<::Input>; + + impl Task { + /// Creates a new `Task` with the given `task_id`. + pub fn new( + task_id: TaskId, + input: TaskInput, + outcome_tx: Option>, + ) -> Self { + let (input, weight) = input.into_parts(); + Task { + input, + meta: TaskMeta::with_weight(task_id, weight), + outcome_tx, + } + } + + /// Consumes this `Task` and returns a `Outcome::WeightLimitExceeded` outcome with the + /// input, weight, and ID, and the outcome sender. + pub fn into_overweight(self) -> (Outcome, Option>) { + let outcome = Outcome::WeightLimitExceeded { + input: self.input, + weight: self.meta.weight(), + task_id: self.meta.id(), + }; + (outcome, self.outcome_tx) + } + } +} + +impl> Clone for Task { + fn clone(&self) -> Self { + Self { + input: self.input.clone(), + meta: self.meta.clone(), + outcome_tx: self.outcome_tx.clone(), + } + } +} + +impl PartialEq for Task { + fn eq(&self, other: &Self) -> bool { + self.meta.id() == other.meta.id() + } +} + +impl Eq for Task {} + +impl PartialOrd for Task { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Task { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.meta.id().cmp(&other.meta.id()) + } +} diff --git a/src/hive/mock.rs b/src/hive/mock.rs new file mode 100644 index 0000000..010a97b --- /dev/null +++ b/src/hive/mock.rs @@ -0,0 +1,121 @@ +//! Utilities for testing `Worker`s. +use super::{Outcome, Task, TaskInput}; +use crate::bee::{Context, LocalContext, TaskId, Worker}; +use std::cell::RefCell; + +/// A struct used for testing `Worker`s in a mock environment without needing to create a `Hive`. +#[derive(Debug)] +pub struct MockTaskRunner { + worker: RefCell, + task_id: RefCell, +} + +impl MockTaskRunner { + /// Creates a new `MockTaskRunner` with a starting task ID of 0. + pub fn new(worker: W, first_task_id: TaskId) -> Self { + Self { + worker: RefCell::new(worker), + task_id: RefCell::new(first_task_id), + } + } + + /// Applies the given `worker` to the given `input`. + /// + /// The task ID is automatically incremented and used to create the `Context`. + /// + /// Returns the `Outcome` from executing the task. + pub fn apply>>(&self, input: I) -> Outcome { + let task_id = self.next_task_id(); + let local = MockLocalContext(self); + let task: Task = Task::new(task_id, input.into(), None); + let (input, task_meta, _) = task.into_parts(); + let ctx = Context::new(task_meta, Some(&local)); + let result = self.worker.borrow_mut().apply(input, &ctx); + let (task_meta, subtask_ids) = ctx.into_parts(); + Outcome::from_worker_result(result, task_meta, subtask_ids) + } + + fn next_task_id(&self) -> TaskId { + let mut task_id_counter = self.task_id.borrow_mut(); + let task_id = *task_id_counter; + *task_id_counter += 1; + task_id + } +} + +impl From for MockTaskRunner { + fn from(value: W) -> Self { + Self::new(value, 0) + } +} + +impl Default for MockTaskRunner { + fn default() -> Self { + Self::from(W::default()) + } +} + +#[derive(Debug)] +struct MockLocalContext<'a, W: Worker>(&'a MockTaskRunner); + +impl LocalContext for MockLocalContext<'_, W> +where + W: Worker, + I: Into>, +{ + fn should_cancel_tasks(&self) -> bool { + false + } + + fn submit_task(&self, _: I) -> TaskId { + self.0.next_task_id() + } + + #[cfg(test)] + fn thread_index(&self) -> usize { + 0 + } +} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use std::vec; + + use super::MockTaskRunner; + use crate::bee::{Context, Worker, WorkerResult}; + use crate::hive::Outcome; + + #[derive(Debug, Default)] + struct TestWorker; + + impl Worker for TestWorker { + type Input = usize; + type Output = usize; + type Error = (); + + fn apply(&mut self, input: Self::Input, ctx: &Context) -> WorkerResult { + if !ctx.is_cancelled() { + for i in 1..=3 { + ctx.submit(input + i).unwrap(); + } + } + Ok(input) + } + } + + #[test] + fn test_works() { + let runner = MockTaskRunner::::default(); + let outcome = runner.apply(42usize); + assert!(matches!( + outcome, + Outcome::SuccessWithSubtasks { + value: 42, + task_id: 0, + .. + } + )); + assert_eq!(outcome.subtask_ids(), Some(&vec![1, 2, 3])) + } +} diff --git a/src/hive/mod.rs b/src/hive/mod.rs index 19d26df..0a1bde3 100644 --- a/src/hive/mod.rs +++ b/src/hive/mod.rs @@ -1,13 +1,15 @@ //! A worker pool implementation. //! -//! A [`Hive`](crate::hive::Hive) has a pool of worker threads that it uses to execute tasks. +//! A [`Hive`](crate::hive::Hive) has a pool of worker threads that it uses to execute tasks. //! -//! The `Hive` has a [`Queen`] of type `Q`, which it uses to create a [`Worker`] of type `W` for -//! each thread it starts in the pool. +//! The `Hive` has a [`Queen`](crate::bee::Queen) of type `Q`, which it uses to create a +//! [`Worker`] of type `Queen::Kind` for each thread it starts in the pool. The `Hive` also has +//! `TaskQueues` implementation of type `T`, which provides the global and worker threads-local +//! queues for managing tasks. //! //! Each task is submitted to the `Hive` as an input of type `W::Input`, and, optionally, a //! channel where the [`Outcome`] of processing the task will be sent upon completion. To these, -//! the `Hive` adds additional context to create the task. It then adds the task to an internal +//! the `Hive` adds additional context to create the task. It then adds the task to its global //! queue that is shared with all the worker threads. //! //! Each worker thread executes a loop in which it receives a task, evaluates it with its `Worker`, @@ -21,35 +23,36 @@ //! # Creating a `Hive` //! //! The typical way to create a `Hive` is using a [`Builder`]. Use -//! [`Builder::new()`](crate::hive::builder::Builder::new) to create an empty (completely -//! unconfigured) `Builder`, or [`Builder::default()`](crate::hive::builder::Builder::default) to +//! [`OpenBuilder::empty()`](crate::hive::OpenBuilder::empty) to create an empty (completely +//! unconfigured) `Builder`, or [`OpenBuilder::default()`](crate::hive::OpenBuilder::default) to //! create a `Builder` configured with the global default values (see below). //! -//! See the [`Builder`] documentation for more details on the options that may be configured, and -//! the `build*` methods available to create the `Hive`. +//! See the [`builder` module documentation](crate::hive::builder) for more details on the options +//! that may be configured. //! //! Building a `Hive` consumes the `Builder`. To create multiple identical `Hive`s, you can `clone` //! the `Builder`. //! //! ``` -//! use beekeeper::hive::Builder; +//! use beekeeper::hive::prelude::*; //! # type MyWorker1 = beekeeper::bee::stock::EchoWorker; //! # type MyWorker2 = beekeeper::bee::stock::EchoWorker; //! -//! let builder1 = Builder::default(); +//! let builder1 = channel_builder(true); //! let builder2 = builder1.clone(); //! -//! let hive1 = builder1.build_with_default::(); -//! let hive2 = builder2.build_with_default::(); +//! let hive1 = builder1.with_worker_default::().build(); +//! let hive2 = builder2.with_worker_default::().build(); //! ``` //! //! If you want a `Hive` with the global defaults for a `Worker` type that implements `Default`, -//! you can call [`Hive::default`](crate::hive::Hive::default) rather than use a `Builder`. +//! you can call [`DefaultHive::::default`](crate::hive::Hive::default) rather than use a +//! `Builder`. //! //! ``` -//! # use beekeeper::hive::Hive; +//! # use beekeeper::hive::DefaultHive; //! # type MyWorker = beekeeper::bee::stock::EchoWorker; -//! let hive: Hive = Hive::default(); +//! let hive = DefaultHive::::default(); //! ``` //! //! ## Thread affinity (requires `feature = "affinity"`) @@ -70,30 +73,29 @@ //! terms of its index, which is a value in the range `0..n`, where `n` is the number of available //! CPU cores. Internally, a mapping is maintained between the index and the OS-specific core ID. //! -//! The [`Builder::core_affinity`](crate::hive::builder::Builder::core_affinity) method accepts a -//! range of core indices that are reserved as *available* for the `Hive` to use for thread-pinning, -//! but they may or may not actually be used (depending on the number of worker threads and core -//! availability). The number of available cores can be smaller or larger than the number of -//! threads. Any thread that is spawned for which there is no corresponding core index is simply -//! started with no core affinity. +//! The [`Builder::core_affinity`] method accepts a range of core indices that are reserved as +//! *available* for the `Hive` to use for thread-pinning, but they may or may not actually be used +//! (depending on the number of worker threads and core availability). The number of available +//! cores can be smaller or larger than the number of threads. Any thread that is spawned for which +//! there is no corresponding core index is simply started with no core affinity. //! //! ``` -//! use beekeeper::hive::Builder; +//! use beekeeper::hive::prelude::*; //! # type MyWorker = beekeeper::bee::stock::EchoWorker; //! -//! let hive = Builder::new() +//! let hive = channel_builder(false) //! .num_threads(4) //! // 16 cores will be available for pinning but only 4 will be used initially //! .core_affinity(0..16) -//! .build_with_default::(); +//! .with_worker_default::() +//! .build(); //! //! // increase the number of threads by 12 - the new threads will use the additiona //! // 12 available cores for pinning //! hive.grow(12); //! //! // increase the number of threads and also provide additional cores for pinning -//! // this requires the `affinity` feature -//! // hive.grow_with_affinity(4, 16..20); +//! hive.grow_with_affinity(4, 16..20); //! ``` //! //! As an application developer depending on `beekeeper`, you must ensure you assign each core @@ -105,7 +107,7 @@ //! //! ## Retrying tasks (requires `feature = "retry"`) //! -//! Some types of tasks (e.g., those requirng network I/O operations) may fail transiently but +//! Some types of tasks (e.g., those requiring network I/O operations) may fail transiently but //! could be successful if retried at a later time. Such retry behavior is supported by the `retry` //! feature and only requires a) configuring the `Builder` by setting //! [`max_retries`](crate::hive::Builder::max_retries) and (optionally) @@ -113,7 +115,7 @@ //! to return [`ApplyError::Retryable`](crate::bee::ApplyError::Retryable) for transient failures. //! //! When a `Retryable` error occurs, the following steps happen: -//! * The `attempt` number in the task's [`Context`] is incremented. +//! * The `attempt` number in the task's [`Context`](crate::bee::Context) is incremented. //! * If the `attempt` number exceeds `max_retries`, the error is converted to //! `Outcome::MaxRetriesAttempted` and sent/stored. //! * Otherwise, the task is added to the `Hive`'s retry queue. @@ -121,40 +123,76 @@ //! `2^(attempt - 1) * retry_factor`. //! * If a `retry_factor` is not configured, then the task is queued with no delay. //! * When a worker thread becomes available, it first checks the retry queue to see if there is -//! a task to retry before taking a new task from the input channel. +//! a task to retry before taking a new task from the global queue. //! -//! Note that `ApplyError::Retryable` is not feature-gated - a `Worker` can be implemented to be -//! retry-aware but used with a `Hive` for which retry is not enabled, or in an application where -//! the `retry` feature is not enabled. In such cases, `Retryable` errors are automatically -//! converted to `Fatal` errors by the worker thread. +//! Note that `ApplyError::Retryable` and `Context::attempt` are not feature-gated - a `Worker` can +//! be implemented to be retry-aware but used with a `Hive` for which retry is not enabled, or in +//! an application where the `retry` feature is not enabled. In such cases, `Retryable` errors are +//! automatically converted to `Fatal` errors by the worker thread. //! -//! ## Batching tasks (requires `feature = "batching"`) +//! ## Batching tasks (requires `feature = "local-batch"`) //! //! The performance of a `Hive` can degrade as the number of worker threads grows and/or the //! average duration of a task shrinks, due to increased contention between worker threads when -//! receiving tasks from the shared input channel. To improve performance, workers can take more +//! receiving tasks from the shared global queue. To improve performance, workers can take more //! than one task each time they access the input channel, and store the extra tasks in a local -//! queue. This behavior is activated by enabling the `batching` feature. +//! queue. This behavior is activated by enabling the `local-batch` feature. //! -//! With the `batching` feature enabled, `Builder` gains the -//! [`batch_size`](crate::hive::Builder::batch_size) method for configuring size of worker threads' -//! local queues, and `Hive` gains the [`set_worker_batch_size`](crate::hive::Hive::set_batch_size) -//! method for changing the batch size of an existing `Hive`. +//! With the `local-batch` feature enabled, `Builder` gains the +//! [`batch_limit`](crate::hive::Builder::batch_limit) method for configuring size of worker threads' +//! local queues, and `Hive` gains the +//! [`set_worker_batch_limit`](crate::hive::Hive::set_worker_batch_limit) method for changing the +//! batch size of an existing `Hive`. +//! +//! ### Task weighting +//! +//! With the `local-batch` feature enabled, it also becomes possible to assign a weight to each +//! input. This is useful for workloads where some tasks may take substantially longer to process +//! than others. Combined with setting a weight limit (using [`Builder::weight_limit`] +//! or [`Hive::set_worker_weight_limit`](crate::hive::Hive::set_worker_batch_limit)), this limits +//! the number of tasks that can be queued by a worker thread based on the minimum of the batch +//! size and the total task weight. +//! +//! A weighted input is an instance of [`Weighted`], where `T` is the worker's input type. +//! Instances of `Weighted` can be created explicitly, or you can convert input values using +//! the methods on `Weighted` or iterators over input values using the methods on the +//! [`WeightedIteratorExt`] extension trait. +//! +//! The `Hive` methods for submitting tasks accept both weighted and unweighted input, but weighted +//! inputs are *not* supported with the `local-batch` feature disabled. +//! +//! ``` +//! use beekeeper::hive::prelude::*; +//! # type MyWorker = beekeeper::bee::stock::EchoWorker; +//! +//! let hive = channel_builder(false) +//! .num_threads(4) +//! .batch_limit(10) +//! .weight_limit(10) +//! .with_worker_default::() +//! .build(); +//! +//! // creates weighted inputs, where each input's weight is the same +//! // as it's value, e.g. `((0,0), (1,1),..,(9,9))` +//! let inputs = (0..10).into_iter().into_identity_weighted(); +//! let outputs = hive.map(inputs).into_outputs(); +//! ``` //! //! ## Global defaults //! //! The [`hive`](crate::hive) module has functions for setting the global default values for some //! of the `Builder` parameters. These default values are used to pre-configure the `Builder` when -//! using `Builder::default()`. +//! using [`OpenBuilder::default()`]. //! //! The available global defaults are: //! //! * `num_threads` //! * [`set_num_threads_default`]: sets the default to a specific value //! * [`set_num_threads_default_all`]: sets the default to all available CPU cores -//! * [`batch_size`](crate::hive::set_batch_size_default) (requires `feature = "batching"`) -//! * [`max_retries`](crate::hive::set_max_retries_default] (requires `feature = "retry"`) -//! * [`retry_factor`](crate::hive::set_retry_factor_default] (requires `feature = "retry"`) +//! * [`batch_limit`](crate::hive::set_batch_limit_default) (requires `feature = "local-batch"`) +//! * [`weight_limit`](crate::hive::set_weight_limit_default) (requires `feature = "local-batch"`) +//! * [`max_retries`](crate::hive::set_max_retries_default) (requires `feature = "retry"`) +//! * [`retry_factor`](crate::hive::set_retry_factor_default) (requires `feature = "retry"`) //! //! The global defaults can be reset their original values using the [`reset_defaults`] function. //! @@ -162,9 +200,10 @@ //! //! A `Hive` is simply a wrapper around a data structure that is shared between the `Hive`, its //! worker threads, and any clones that have been made of the `Hive`. In other works, cloning a -//! `Hive` simply creates another reference to the same shared data (similar to cloning an [`Arc`]). -//! The worker threads and the shared data structure are dropped automatically when the last `Hive` -//! referring to them is dropped (see "Disposing of a Hive" below). +//! `Hive` simply creates another reference to the same shared data (similar to cloning an +//! [`Arc`](std::sync::Arc)). The worker threads and the shared data structure are dropped +//! automatically when the last `Hive` referring to them is dropped (see "Disposing of a Hive" +//! below). //! //! # Submitting tasks //! @@ -181,7 +220,8 @@ //! //! Each group of functions has multiple variants: //! * The methods that end with `_send` all take a channel sender as a second argument and will -//! deliver results to that channel as they become available. +//! deliver results to that channel as they become available. See the note below on proper use of +//! channels. //! * The methods that end with `_store` are all non-blocking functions that return the task IDs //! associated with the submitted tasks and will store the task results in the hive. The outcomes //! can be retrieved from the hive later by their IDs, e.g., using `remove_success`. @@ -210,10 +250,34 @@ //! You can create an instance of the enabled outcome channel type using the [`outcome_channel`] //! function. //! +//! `Hive` has several methods (with the `_send` suffix) for submitting tasks whose outcomes will be +//! delivered to a user-specified channel. Note that, for these methods, the `tx` parameter is of +//! type `Borrow>`, which allows you to pass in either a value or a reference. +//! Passing a value causes the `Sender` to be dropped after the call; passing a reference allows +//! you to use the same `Sender` for multiple `_send` calls, but you need to explicitly drop the +//! sender (e.g., `drop(tx)`), pass it by value to the last `_send` call, or be careful about how +//! you obtain outcomes from the `Receiver`. Methods such as `recv` and `iter` will block until the +//! `Sender` is dropped. Since `Receiver` implements `Iterator`, you can use the methods of +//! [`OutcomeIteratorExt`] to iterate over the outcomes for specific task IDs. +//! +//! ```rust,ignore +//! use beekeeper::hive::prelude::*; +//! let (tx, rx) = outcome_channel(); +//! let hive = ... +//! let task_ids = hive.map_send(0..10, tx); +//! rx.select_unordered_outputs(task_ids).for_each(|output| ...); +//! ``` +//! +//! You should *not* pass clones of the `Sender` to `_send` methods as this results in slightly +//! worse performance and still has the requirement that you manually drop the original `Sender` +//! value. +//! //! # Retrieving outcomes //! //! Each task that is successfully submitted to a `Hive` will have a corresponding `Outcome`. -//! [`Outcome`] is similar to `Result`, except that the error variants are enumerated: +//! [`Outcome`] is similar to `Result`, except that the error variants are enumerated. +//! * [`Success`](Outcome::Success): the task completed successfully. The output is provided in +//! the `value` field. //! * [`Failure`](Outcome::Failure): the task failed with an error of type `W::Error`. If possible, //! the input value is also provided. //! * [`Panic`](Outcome::Panic): the `Worker` panicked while processing the task. The panic @@ -226,6 +290,12 @@ //! ID was found. This variant is only used when a list of outcomes is requested, such as when //! using one of the `select_*` methods on an `Outcome` iterator (see below). //! +//! Two additional `Outcome` variants depend on optional feature flags: +//! * [`WeightLimitExceeded`](Outcome::WeightLimitExceeded): depends on the `local-batch` feature; +//! the task's weight exceeded the limit set for the `Hive`. +//! * [`MaxRetriesAttempted`](Outcome::MaxRetriesAttempted): depends on the `retry` feature; the +//! task failed after being retried the maximum number of times. +//! //! An `Outcome` can be converted into a `Result` (using `into()`) or //! [`unwrap`](crate::hive::Outcome::unwrap)ped into an output value of type `W::Output`. //! @@ -237,7 +307,7 @@ //! methods create a dedicated outcome channel to use for each batch of tasks, and thus expect the //! channel receiver to receive exactly the outcomes with the task IDs of the submitted tasks. If, //! somehow, an unexpected `Outcome` is received, it is silently dropped. If any expected outcomes -//! have not been received after the channel sender has disconnected, then those task IDs are' +//! have not been received after the channel sender has disconnected, then those task IDs are //! yielded as `Outcome::Missing` results. //! //! When the [`OutcomeIteratorExt`] trait is in scope, then additional methods become available on @@ -250,7 +320,7 @@ //! //! ## Outcome channels //! -//! Using one of the `*_send` methods with a channel enables the `Hive` to send you `Outcome`s +//! Using one of the `*_send` methods with a channel enables the `Hive` to send `Outcome`s //! asynchronously as they become available. This means that you will likely receive the outcomes //! out of order (i.e., not in the same order as the provided inputs). //! @@ -266,13 +336,13 @@ //! task IDs. //! //! ``` -//! use beekeeper::hive::{Hive, OutcomeIteratorExt, outcome_channel}; +//! use beekeeper::hive::{DefaultHive, OutcomeIteratorExt, outcome_channel}; //! # type MyWorker = beekeeper::bee::stock::EchoWorker; //! -//! let hive: Hive = Hive::default(); +//! let hive = DefaultHive::::default(); //! let (tx, rx) = outcome_channel::(); -//! let batch1 = hive.swarm_send(0..10, tx.clone()); -//! let batch2 = hive.swarm_send(10..20, tx.clone()); +//! let batch1 = hive.swarm_send(0..10, &tx); +//! let batch2 = hive.swarm_send(10..20, tx); //! let outputs: Vec<_> = rx.into_iter() //! .select_ordered_outputs(batch1.into_iter().chain(batch2.into_iter())) //! .collect(); @@ -287,10 +357,10 @@ //! (see below), which provides a common interface for accessing stored `Outcome`s. //! //! ``` -//! use beekeeper::hive::{Hive, OutcomeStore}; +//! use beekeeper::hive::{DefaultHive, OutcomeStore}; //! # type MyWorker = beekeeper::bee::stock::EchoWorker; //! -//! let hive: Hive = Hive::default(); +//! let hive = DefaultHive::::default(); //! let (outcomes, sum) = hive.scan(0..10, 0, |sum, i| { //! *sum += i; //! i * 2 @@ -314,20 +384,19 @@ //! outcomes. //! //! Processing can be resumed by calling the [`resume`](crate::hive::Hive::resume) method. -//! Alternatively, the [`resume_send`](crate::hive::Hive::resume_send) or -//! [`resume_store`](crate::hive::Hive::resume_store) method can be used to both resume and -//! submit any unprocessed tasks stored in the `Hive` for (re)processing. +//! The [`swarm_unprocessed_send`](crate::hive::Hive::swarm_unprocessed_send) or +//! [`swarm_unprocessed_store`](crate::hive::Hive::swarm_unprocessed_store) methods can be used to +//! submit any unprocessed tasks stored in the `Hive` for (re)processing after resuming the `Hive`. //! //! ## Hive poisoning //! //! The internal data structure shared between a `Hive`, its clones, and its worker threads is //! considered thread-safe. However, there is no formal proof that it is incorruptible. A `Hive` //! attempts to detect if it has become corrupted and, if so, sets the `poisoned` flag on the -//! shared data. A poisoned `Hive` will not accept or process any new tasks, and all worker threads -//! will terminate after finishing their current tasks. If a task is submitted to a poisoned `Hive`, -//! it will immediately be converted to an `Unprocessed` outcome and sent/stored. The only thing -//! that can be done with a poisoned `Hive` is to access its stored `Outcome`s or convert it to a -//! `Husk` (see below). +//! shared data. A poisoned `Hive` will not accept or process any new tasks, all worker threads +//! will terminate after finishing their current tasks, and all queued tasks are converted to +//! `Unprocessed` outcomes and sent/stored. The only thing that can be done with a poisoned `Hive` +//! is to access its stored `Outcome`s or convert it to a `Husk` (see below). //! //! # Disposing of a `Hive` //! @@ -336,7 +405,7 @@ //! its shared data is dropped, then the following steps happen: //! * The `Hive` is poisoned to prevent any new tasks from being submitted or queued tasks from //! being processed. -//! * All of the `Hive`s queued tasks are coverted to `Unprocessed` outcomes and either sent to +//! * All of the `Hive`'s queued tasks are coverted to `Unprocessed` outcomes and either sent to //! their outcome channel or stored in the `Hive`. //! * If the `Hive` was in a suspended state, it is resumed. This is necessary to unblock the //! worker threads and allow them to terminate. @@ -355,179 +424,89 @@ //! The `Husk` can be used to create a new `Builder` //! ([`Husk::as_builder`](crate::hive::husk::Husk::as_builder)) or a new `Hive` //! ([`Husk::into_hive`](crate::hive::husk::Husk::into_hive)). -mod builder; -mod config; -mod counter; -mod gate; +pub mod builder; +mod context; +#[cfg(feature = "affinity")] +pub mod cores; #[allow(clippy::module_inception)] mod hive; mod husk; +mod inner; +pub mod mock; mod outcome; -// TODO: scoped hive is still a WIP -//mod scoped; -mod shared; -mod task; -//mod workstealing; - -#[cfg(feature = "affinity")] -pub mod cores; -#[cfg(feature = "retry")] -mod delay; - -pub use self::builder::Builder; -#[cfg(feature = "batching")] -pub use self::config::set_batch_size_default; -pub use self::config::{reset_defaults, set_num_threads_default, set_num_threads_default_all}; -#[cfg(feature = "retry")] -pub use self::config::{ - set_max_retries_default, set_retries_default_disabled, set_retry_factor_default, +mod sentinel; +mod util; +#[cfg(feature = "local-batch")] +mod weighted; + +pub use self::builder::{BeeBuilder, ChannelBuilder, FullBuilder, OpenBuilder, TaskQueuesBuilder}; +pub use self::builder::{ + channel as channel_builder, open as open_builder, workstealing as workstealing_builder, }; -pub use self::hive::Poisoned; +#[cfg(feature = "affinity")] +pub use self::cores::{Core, Cores}; +pub use self::hive::{DefaultHive, Hive, Poisoned}; pub use self::husk::Husk; +pub use self::inner::{ + Builder, ChannelTaskQueues, TaskInput, TaskQueues, WorkstealingTaskQueues, set_config::*, +}; pub use self::outcome::{Outcome, OutcomeBatch, OutcomeIteratorExt, OutcomeStore}; +#[cfg(feature = "local-batch")] +pub use self::weighted::{Weighted, WeightedIteratorExt}; + +use self::context::HiveLocalContext; +use self::inner::{Config, Shared, Task, WorkerQueues}; +use self::outcome::{DerefOutcomes, OutcomeQueue, OwnedOutcomes}; +use self::sentinel::Sentinel; +use crate::bee::Worker; +use crate::channel::{Receiver, Sender, channel}; +use std::io::Error as SpawnError; /// Sender type for channel used to send task outcomes. -pub type OutcomeSender = crate::channel::Sender>; +pub type OutcomeSender = Sender>; /// Receiver type for channel used to receive task outcomes. -pub type OutcomeReceiver = crate::channel::Receiver>; +pub type OutcomeReceiver = Receiver>; /// Creates a channel (`Sender`, `Receiver`) pair for sending task outcomes from the `Hive` to the /// task submitter. #[inline] pub fn outcome_channel() -> (OutcomeSender, OutcomeReceiver) { - crate::channel::channel() + channel() } pub mod prelude { pub use super::{ - outcome_channel, Builder, Hive, Husk, Outcome, OutcomeBatch, OutcomeIteratorExt, - OutcomeStore, Poisoned, + Builder, Hive, Husk, Outcome, OutcomeBatch, OutcomeIteratorExt, OutcomeStore, Poisoned, + TaskQueuesBuilder, channel_builder, open_builder, outcome_channel, workstealing_builder, }; -} - -use self::counter::DualCounter; -use self::gate::{Gate, PhasedGate}; -use self::outcome::{DerefOutcomes, OwnedOutcomes}; -use crate::atomic::{AtomicAny, AtomicBool, AtomicOption, AtomicUsize}; -use crate::bee::{Context, Queen, TaskId, Worker}; -use parking_lot::Mutex; -use std::collections::HashMap; -use std::io::Error as SpawnError; -use std::sync::Arc; -use std::thread::JoinHandle; - -type Any = AtomicOption>; -type Usize = AtomicOption; -#[cfg(feature = "retry")] -type U32 = AtomicOption; -#[cfg(feature = "retry")] -type U64 = AtomicOption; - -/// A pool of worker threads that each execute the same function. -/// -/// See the [module documentation](crate::hive) for details. -pub struct Hive>(Option>); - -/// A `Hive`'s inner state. Wraps a) the `Hive`'s reference to the `Shared` data (which is shared -/// with the worker threads) and b) the `Sender>`, which is the sending end of the channel -/// used to send tasks to the worker threads. -struct HiveInner> { - task_tx: TaskSender, - shared: Arc>, -} - -type TaskSender = std::sync::mpsc::Sender>; -type TaskReceiver = std::sync::mpsc::Receiver>; - -/// Internal representation of a task to be processed by a `Hive`. -struct Task { - input: W::Input, - ctx: Context, - outcome_tx: Option>, -} - -/// Core configuration parameters that are set by a `Builder`, used in a `Hive`, and preserved in a -/// `Husk`. Fields are `AtomicOption`s, which enables them to be transitioned back and forth -/// between thread-safe and non-thread-safe contexts. -#[derive(Clone, Debug, Default)] -struct Config { - /// Number of worker threads to spawn - num_threads: Usize, - /// Name to give each worker thread - thread_name: Any, - /// Stack size for each worker thread - thread_stack_size: Usize, - /// CPU cores to which worker threads can be pinned - #[cfg(feature = "affinity")] - affinity: Any, - /// Maximum number of tasks for a worker thread to - /// take when receiving tasks from the input channel - #[cfg(feature = "batching")] - batch_size: Usize, - /// Maximum number of retries for a task - #[cfg(feature = "retry")] - max_retries: U32, - /// Multiplier for the retry backoff strategy - #[cfg(feature = "retry")] - retry_factor: U64, -} - -/// Data shared by all worker threads in a `Hive`. -struct Shared> { - /// core configuration parameters - config: Config, - /// the `Queen` used to create new workers - queen: Mutex, - /// receiver for the channel used by the `Hive` to send tasks to the worker threads - task_rx: Mutex>, - /// The results of spawning each worker - spawn_results: Mutex, SpawnError>>>, - /// allows for 2^48 queued tasks and 2^16 active tasks - num_tasks: DualCounter<48>, - /// ID that will be assigned to the next task submitted to the `Hive` - next_task_id: AtomicUsize, - /// number of times a worker has panicked - num_panics: AtomicUsize, - /// number of `Hive` clones with a reference to this shared data - num_referrers: AtomicUsize, - /// whether the internal state of the hive is corrupted - if true, this prevents new tasks from - /// processed (new tasks may be queued but they will never be processed); currently, this can - /// only happen if the task counter somehow get corrupted - poisoned: AtomicBool, - /// whether the hive is suspended - if true, active tasks may complete and new tasks may be - /// queued, but new tasks will not be processed - suspended: Arc, - /// gate used by worker threads to wait until the hive is resumed - resume_gate: Gate, - /// gate used by client threads to wait until all tasks have completed - join_gate: PhasedGate, - /// outcomes stored in the hive - outcomes: Mutex>>, - /// worker thread-specific queues of tasks used when the `batching` feature is enabled - #[cfg(feature = "batching")] - local_queues: parking_lot::RwLock>>>, - /// queue used for tasks that are waiting to be retried after a failure - #[cfg(feature = "retry")] - retry_queues: parking_lot::RwLock>>>, + #[cfg(feature = "local-batch")] + pub use super::{Weighted, WeightedIteratorExt}; } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { - use super::{Builder, Hive, Outcome, OutcomeIteratorExt, OutcomeStore}; + use super::inner::TaskQueues; + use super::{ + Builder, ChannelTaskQueues, Hive, Outcome, OutcomeIteratorExt, OutcomeStore, + TaskQueuesBuilder, WorkstealingTaskQueues, channel_builder, workstealing_builder, + }; use crate::barrier::IndexedBarrier; use crate::bee::stock::{Caller, OnceCaller, RefCaller, Thunk, ThunkWorker}; use crate::bee::{ - ApplyError, ApplyRefError, Context, DefaultQueen, Queen, RefWorker, RefWorkerResult, + ApplyError, ApplyRefError, Context, DefaultQueen, QueenMut, RefWorker, RefWorkerResult, TaskId, Worker, WorkerResult, }; use crate::channel::{Message, ReceiverExt}; use crate::hive::outcome::DerefOutcomes; + use rstest::*; use std::fmt::Debug; use std::io::{self, BufRead, BufReader, Write}; use std::process::{Child, ChildStdin, ChildStdout, Command, ExitStatus, Stdio}; use std::sync::{ + Arc, Barrier, atomic::{AtomicUsize, Ordering}, - mpsc, Arc, Barrier, + mpsc, }; use std::thread; use std::time::Duration; @@ -537,42 +516,68 @@ mod tests { const SHORT_TASK: Duration = Duration::from_secs(2); const LONG_TASK: Duration = Duration::from_secs(5); - type ThunkHive = Hive, DefaultQueen>>; + type TWrk = ThunkWorker; /// Convenience function that returns a `Hive` configured with the global defaults, and the /// specified number of workers that execute `Thunk`s, i.e. closures that return `T`. - pub fn thunk_hive(num_threads: usize) -> ThunkHive { - Builder::default() + pub fn thunk_hive(num_threads: usize, builder: B) -> Hive>, T> + where + I: Send + Sync + Debug + 'static, + T: TaskQueues>, + B: TaskQueuesBuilder> = T>, + { + builder .num_threads(num_threads) - .build_with_default() + .with_queen_default() + .build() } - #[test] - fn test_works() { - let hive = thunk_hive(TEST_TASKS); + pub fn void_thunk_hive(num_threads: usize, builder: B) -> Hive>, T> + where + T: TaskQueues>, + B: TaskQueuesBuilder> = T>, + { + thunk_hive(num_threads, builder) + } + + #[rstest] + fn test_works(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive(TEST_TASKS, builder_factory(true)); let (tx, rx) = mpsc::channel(); assert_eq!(hive.max_workers(), TEST_TASKS); assert_eq!(hive.alive_workers(), TEST_TASKS); assert!(!hive.has_dead_workers()); for _ in 0..TEST_TASKS { let tx = tx.clone(); - hive.apply_store(Thunk::of(move || { + hive.apply_store(Thunk::from(move || { tx.send(1).unwrap(); })); } assert_eq!(rx.iter().take(TEST_TASKS).sum::(), TEST_TASKS); } - #[test] - fn test_grow_from_zero() { - let hive = thunk_hive::(0); + #[rstest] + fn test_grow_from_zero( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive::(0, builder_factory(true)); // check that with 0 threads no tasks are scheduled let (tx, rx) = super::outcome_channel(); - let _ = hive.apply_send(Thunk::of(|| 0), tx); + let _ = hive.apply_send(Thunk::from(|| 0), &tx); thread::sleep(ONE_SEC); assert_eq!(hive.num_tasks().0, 1); assert!(matches!(rx.try_recv_msg(), Message::ChannelEmpty)); - hive.grow(1).expect("error spawning threads"); + assert!(matches!(hive.grow(0), Ok(0))); + thread::sleep(ONE_SEC); + assert_eq!(hive.num_tasks().0, 1); + assert!(matches!(hive.grow(1), Ok(1))); thread::sleep(ONE_SEC); assert_eq!(hive.num_tasks().0, 0); assert!(matches!( @@ -581,12 +586,17 @@ mod tests { )); } - #[test] - fn test_grow() { - let hive: ThunkHive<()> = Builder::new().num_threads(TEST_TASKS).build_with_default(); + #[rstest] + fn test_grow_from_nonzero( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = void_thunk_hive(TEST_TASKS, builder_factory(false)); // queue some long-running tasks for _ in 0..TEST_TASKS { - hive.apply_store(Thunk::of(|| thread::sleep(LONG_TASK))); + hive.apply_store(Thunk::from(|| thread::sleep(LONG_TASK))); } thread::sleep(ONE_SEC); assert_eq!(hive.num_tasks().1, TEST_TASKS as u64); @@ -596,21 +606,46 @@ mod tests { hive.grow(new_threads).expect("error spawning threads"); // queue some more long-running tasks for _ in 0..new_threads { - hive.apply_store(Thunk::of(|| thread::sleep(LONG_TASK))); + hive.apply_store(Thunk::from(|| thread::sleep(LONG_TASK))); } thread::sleep(ONE_SEC); assert_eq!(hive.num_tasks().1, total_threads as u64); - let husk = hive.try_into_husk().unwrap(); + let husk = hive.try_into_husk(false).unwrap(); assert_eq!(husk.iter_successes().count(), total_threads); } - #[test] - fn test_suspend() { - let hive: ThunkHive<()> = Builder::new().num_threads(TEST_TASKS).build_with_default(); + #[rstest] + fn test_use_all_cores(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = void_thunk_hive(0, builder_factory(false)); + let num_cores = num_cpus::get(); + // queue some long-running tasks + for _ in 0..num_cores { + hive.apply_store(Thunk::from(|| thread::sleep(LONG_TASK))); + } + thread::sleep(ONE_SEC); + assert_eq!(hive.num_tasks().0, num_cores as u64); + assert_eq!(hive.use_all_cores().unwrap(), num_cores); + assert_eq!(hive.max_workers(), num_cores); + thread::sleep(ONE_SEC); + let husk = hive.try_into_husk(false).unwrap(); + assert_eq!(husk.iter_successes().count(), num_cores); + } + + #[rstest] + fn test_suspend(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = void_thunk_hive(TEST_TASKS, builder_factory(false)); // queue some long-running tasks let total_tasks = 2 * TEST_TASKS; for _ in 0..total_tasks { - hive.apply_store(Thunk::of(|| thread::sleep(SHORT_TASK))); + hive.apply_store(Thunk::from(|| thread::sleep(SHORT_TASK))); } thread::sleep(ONE_SEC); assert_eq!(hive.num_tasks(), (TEST_TASKS as u64, TEST_TASKS as u64)); @@ -637,7 +672,11 @@ mod tests { type Output = u8; type Error = (); - fn apply_ref(&mut self, input: &Self::Input, ctx: &Context) -> RefWorkerResult { + fn apply_ref( + &mut self, + input: &Self::Input, + ctx: &Context, + ) -> RefWorkerResult { for _ in 0..3 { thread::sleep(Duration::from_secs(1)); if ctx.is_cancelled() { @@ -648,16 +687,59 @@ mod tests { } } - #[test] - fn test_suspend_with_cancelled_tasks() { - let hive = Builder::new() + #[rstest] + fn test_suspend_resume_send_with_cancelled_tasks( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive: Hive<_, _> = builder_factory(false) .num_threads(TEST_TASKS) - .build_with_default::(); + .with_worker_default::() + .build(); + let _ = hive.swarm_store(0..TEST_TASKS as u8); + // wait for tasks to be started + thread::sleep(Duration::from_millis(500)); + assert_eq!(hive.num_tasks(), (0, TEST_TASKS as u64)); + hive.suspend(); + // wait for tasks to be cancelled + thread::sleep(Duration::from_secs(2)); + assert_eq!(hive.num_tasks(), (0, 0)); + assert_eq!(hive.num_unprocessed(), TEST_TASKS); + hive.resume(); + let (tx, rx) = super::outcome_channel(); + let new_task_ids = hive.swarm_unprocessed_send(tx); + assert_eq!(new_task_ids.len(), TEST_TASKS); + thread::sleep(Duration::from_millis(500)); + // unprocessed tasks should be requeued + assert_eq!(hive.num_tasks(), (0, TEST_TASKS as u64)); + hive.join(); + let mut outputs = rx + .into_iter() + .select_ordered_outputs(new_task_ids) + .collect::>(); + outputs.sort(); + assert_eq!(outputs, (0..TEST_TASKS as u8).collect::>()); + } + + #[rstest] + fn test_suspend_resume_store_with_cancelled_tasks( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive: Hive<_, _> = builder_factory(false) + .num_threads(TEST_TASKS) + .with_worker_default::() + .build(); hive.swarm_store(0..TEST_TASKS as u8); hive.suspend(); // wait for tasks to be cancelled thread::sleep(Duration::from_secs(2)); - hive.resume_store(); + hive.resume(); + hive.swarm_unprocessed_store(); thread::sleep(Duration::from_secs(1)); // unprocessed tasks should be requeued assert_eq!(hive.num_tasks().1, TEST_TASKS as u64); @@ -665,12 +747,19 @@ mod tests { assert_eq!(hive.num_successes(), TEST_TASKS); } - #[test] - fn test_num_tasks_active() { - let hive: ThunkHive<()> = Builder::new().num_threads(TEST_TASKS).build_with_default(); + #[rstest] + fn test_num_tasks_active( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = void_thunk_hive(TEST_TASKS, builder_factory(false)); for _ in 0..2 * TEST_TASKS { - hive.apply_store(Thunk::of(|| loop { - thread::sleep(LONG_TASK) + hive.apply_store(Thunk::from(|| { + loop { + thread::sleep(LONG_TASK) + } })); } thread::sleep(ONE_SEC); @@ -679,49 +768,65 @@ mod tests { assert_eq!(num_threads, TEST_TASKS); } - #[test] - fn test_all_threads() { - let hive = Builder::new() + #[rstest] + fn test_all_threads(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive: Hive>, _> = builder_factory(false) + .with_queen_default() .with_thread_per_core() - .build_with_default::>(); + .build(); let num_threads = num_cpus::get(); for _ in 0..num_threads { - hive.apply_store(Thunk::of(|| loop { - thread::sleep(LONG_TASK) + hive.apply_store(Thunk::from(|| { + loop { + thread::sleep(LONG_TASK) + } })); } thread::sleep(ONE_SEC); assert_eq!(hive.num_tasks().1, num_threads as u64); - let num_threads = hive.max_workers(); - assert_eq!(num_threads, num_threads); + let max_workers = hive.max_workers(); + assert_eq!(num_threads, max_workers); } - #[test] - fn test_panic() { - let hive = thunk_hive(TEST_TASKS); + #[rstest] + fn test_panic(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive(TEST_TASKS, builder_factory(true)); let (tx, _) = super::outcome_channel(); // Panic all the existing threads. for _ in 0..TEST_TASKS { - hive.apply_send(Thunk::of(|| panic!("intentional panic")), tx.clone()); + hive.apply_send(Thunk::from(|| panic!("intentional panic")), &tx); } hive.join(); // Ensure that none of the threads have panicked assert_eq!(hive.num_panics(), TEST_TASKS); - let husk = hive.try_into_husk().unwrap(); + let husk = hive.try_into_husk(false).unwrap(); assert_eq!(husk.num_panics(), TEST_TASKS); } - #[test] - fn test_catch_panic() { - let hive = Builder::new() - .num_threads(TEST_TASKS) - .build_with(RefCaller::of(|_: &u8| -> Result { + #[rstest] + fn test_catch_panic(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive: Hive<_, _> = builder_factory(false) + .with_worker(RefCaller::from(|_: &u8| -> Result { panic!("intentional panic") - })); + })) + .num_threads(TEST_TASKS) + .build(); let (tx, rx) = super::outcome_channel(); // Panic all the existing threads. for i in 0..TEST_TASKS { - hive.apply_send(i as u8, tx.clone()); + hive.apply_send(i as u8, &tx); } hive.join(); // Ensure that none of the threads have panicked @@ -732,9 +837,14 @@ mod tests { } } - #[test] - fn test_should_not_panic_on_drop_if_subtasks_panic_after_drop() { - let hive: ThunkHive<()> = Builder::new().num_threads(TEST_TASKS).build_with_default(); + #[rstest] + fn test_should_not_panic_on_drop_if_subtasks_panic_after_drop( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = void_thunk_hive(TEST_TASKS, builder_factory(false)); let waiter = Arc::new(Barrier::new(TEST_TASKS + 1)); let waiter_count = Arc::new(AtomicUsize::new(0)); @@ -742,7 +852,7 @@ mod tests { for _ in 0..TEST_TASKS { let waiter = waiter.clone(); let waiter_count = waiter_count.clone(); - hive.apply_store(Thunk::of(move || { + hive.apply_store(Thunk::from(move || { waiter_count.fetch_add(1, Ordering::SeqCst); waiter.wait(); panic!("intentional panic"); @@ -761,11 +871,16 @@ mod tests { waiter.wait(); } - #[test] - fn test_massive_task_creation() { + #[rstest] + fn test_massive_task_creation( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { let test_tasks = 4_200_000; - let hive = thunk_hive(TEST_TASKS); + let hive = thunk_hive(TEST_TASKS, builder_factory(true)); let b0 = IndexedBarrier::new(TEST_TASKS); let b1 = IndexedBarrier::new(TEST_TASKS); @@ -775,7 +890,7 @@ mod tests { let tx = tx.clone(); let (b0, b1) = (b0.clone(), b1.clone()); - hive.apply_store(Thunk::of(move || { + hive.apply_store(Thunk::from(move || { // Wait until the pool has been filled once. b0.wait(); // wait so the pool can be measured @@ -799,19 +914,24 @@ mod tests { ); } - #[test] - fn test_name() { + #[rstest] + fn test_name(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { let name = "test"; - let hive = Builder::new() + let hive: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_queen_default() .thread_name(name.to_owned()) .num_threads(2) - .build_with_default::>(); + .build(); let (tx, rx) = mpsc::channel(); // initial thread should share the name "test" for _ in 0..2 { let tx = tx.clone(); - hive.apply_store(Thunk::of(move || { + hive.apply_store(Thunk::from(move || { let name = thread::current().name().unwrap().to_owned(); tx.send(name).unwrap(); })); @@ -820,7 +940,7 @@ mod tests { // new spawn thread should share the name "test" too. hive.grow(3).expect("error spawning threads"); let tx_clone = tx.clone(); - hive.apply_store(Thunk::of(move || { + hive.apply_store(Thunk::from(move || { let name = thread::current().name().unwrap().to_owned(); tx_clone.send(name).unwrap(); })); @@ -830,17 +950,22 @@ mod tests { } } - #[test] - fn test_stack_size() { + #[rstest] + fn test_stack_size(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { let stack_size = 4_000_000; - let hive = Builder::new() + let hive: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_queen_default() .num_threads(1) .thread_stack_size(stack_size) - .build_with_default::>(); + .build(); let actual_stack_size = hive - .apply(Thunk::of(|| { + .apply(Thunk::from(|| { //println!("This thread has a 4 MB stack size!"); stacker::remaining_stack().unwrap() })) @@ -851,46 +976,57 @@ mod tests { assert!(actual_stack_size < (stack_size as f64 * 1.01)); } - #[test] - fn test_debug() { - let hive = thunk_hive::<()>(4); + #[rstest] + fn test_debug(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = void_thunk_hive(4, builder_factory(true)); let debug = format!("{:?}", hive); assert_eq!( debug, - "Hive { task_tx: Sender { .. }, shared: Shared { name: None, num_threads: 4, num_tasks_queued: 0, num_tasks_active: 0 } }" + "Hive(Some(Shared { name: None, num_threads: 4, num_tasks_queued: 0, num_tasks_active: 0 }))" ); - let hive = Builder::new() + let hive: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_queen_default() .thread_name("hello") .num_threads(4) - .build_with_default::>(); + .build(); let debug = format!("{:?}", hive); assert_eq!( debug, - "Hive { task_tx: Sender { .. }, shared: Shared { name: \"hello\", num_threads: 4, num_tasks_queued: 0, num_tasks_active: 0 } }" + "Hive(Some(Shared { name: \"hello\", num_threads: 4, num_tasks_queued: 0, num_tasks_active: 0 }))" ); - let hive = thunk_hive(4); - hive.apply_store(Thunk::of(|| thread::sleep(LONG_TASK))); + let hive = thunk_hive(4, builder_factory(true)); + hive.apply_store(Thunk::from(|| thread::sleep(LONG_TASK))); thread::sleep(ONE_SEC); let debug = format!("{:?}", hive); assert_eq!( debug, - "Hive { task_tx: Sender { .. }, shared: Shared { name: None, num_threads: 4, num_tasks_queued: 0, num_tasks_active: 1 } }" + "Hive(Some(Shared { name: None, num_threads: 4, num_tasks_queued: 0, num_tasks_active: 1 }))" ); } - #[test] - fn test_repeated_join() { - let hive = Builder::new() + #[rstest] + fn test_repeated_join(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_queen_default() .thread_name("repeated join test") .num_threads(8) - .build_with_default::>(); + .build(); + let test_count = Arc::new(AtomicUsize::new(0)); for _ in 0..42 { let test_count = test_count.clone(); - hive.apply_store(Thunk::of(move || { + hive.apply_store(Thunk::from(move || { thread::sleep(SHORT_TASK); test_count.fetch_add(1, Ordering::Release); })); @@ -901,7 +1037,7 @@ mod tests { for _ in 0..42 { let test_count = test_count.clone(); - hive.apply_store(Thunk::of(move || { + hive.apply_store(Thunk::from(move || { thread::sleep(SHORT_TASK); test_count.fetch_add(1, Ordering::Relaxed); })); @@ -910,8 +1046,12 @@ mod tests { assert_eq!(84, test_count.load(Ordering::Relaxed)); } - #[test] - fn test_multi_join() { + #[rstest] + fn test_multi_join(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { // Toggle the following lines to debug the deadlock // fn error(_s: String) { // use ::std::io::Write; @@ -922,22 +1062,24 @@ mod tests { // .expect("Failed to write to stderr"); // } - let hive0 = Builder::new() + let hive0: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_queen_default() .thread_name("multi join pool0") .num_threads(4) - .build_with_default::>(); - let hive1 = Builder::new() + .build(); + let hive1: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_queen_default() .thread_name("multi join pool1") .num_threads(4) - .build_with_default::>(); + .build(); let (tx, rx) = crate::channel::channel(); for i in 0..8 { let hive1_clone = hive1.clone(); let hive0_clone = hive0.clone(); let tx = tx.clone(); - hive0.apply_store(Thunk::of(move || { - hive1_clone.apply_store(Thunk::of(move || { + hive0.apply_store(Thunk::from(move || { + hive1_clone.apply_store(Thunk::from(move || { //error(format!("p1: {} -=- {:?}\n", i, hive0_clone)); hive0_clone.join(); // ensure that the main thread has a chance to execute @@ -961,33 +1103,43 @@ mod tests { assert_eq!(rx.into_iter().sum::(), (0..8).sum()); } - #[test] - fn test_empty_hive() { + #[rstest] + fn test_empty_hive(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { // Joining an empty hive must return imminently - let hive = thunk_hive::<()>(4); + // TODO: run this in a thread and kill it after a timeout to prevent hanging the tests + let hive = void_thunk_hive(4, builder_factory(true)); hive.join(); } - #[test] - fn test_no_fun_or_joy() { + #[rstest] + fn test_no_fun_or_joy(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { // What happens when you keep adding tasks after a join fn sleepy_function() { thread::sleep(LONG_TASK); } - let hive = Builder::new() + let hive: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_queen_default() .thread_name("no fun or joy") .num_threads(8) - .build_with_default::>(); + .build(); - hive.apply_store(Thunk::of(sleepy_function)); + hive.apply_store(Thunk::from(sleepy_function)); let p_t = hive.clone(); thread::spawn(move || { (0..23) .inspect(|_| { - p_t.apply_store(Thunk::of(sleepy_function)); + p_t.apply_store(Thunk::from(sleepy_function)); }) .count(); }); @@ -995,14 +1147,16 @@ mod tests { hive.join(); } - #[test] - fn test_map() { - let hive = Builder::new() - .num_threads(2) - .build_with_default::>(); + #[rstest] + fn test_map(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive::(2, builder_factory(false)); let outputs: Vec<_> = hive .map((0..10u8).map(|i| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis((10 - i as u64) * 100)); i }) @@ -1012,58 +1166,66 @@ mod tests { assert_eq!(outputs, (0..10).collect::>()) } - #[test] - fn test_map_unordered() { - let hive = Builder::new() - .num_threads(8) - .build_with_default::>(); - let outputs: Vec<_> = hive + #[rstest] + fn test_map_unordered(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive::(8, builder_factory(false)); + let mut outputs: Vec<_> = hive .map_unordered((0..8u8).map(|i| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis((8 - i as u64) * 100)); i }) })) .map(Outcome::unwrap) .collect(); - assert_eq!(outputs, (0..8).rev().collect::>()) + outputs.sort(); + assert_eq!(outputs, (0..8).collect::>()) } - #[test] - fn test_map_send() { - let hive = Builder::new() - .num_threads(8) - .build_with_default::>(); + #[rstest] + fn test_map_send(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive::(8, builder_factory(false)); let (tx, rx) = super::outcome_channel(); let mut task_ids = hive.map_send( (0..8u8).map(|i| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis((8 - i as u64) * 100)); i }) }), tx, ); - let (mut outcome_task_ids, values): (Vec, Vec) = rx + let (mut outcome_task_ids, mut values): (Vec, Vec) = rx .iter() .map(|outcome| match outcome { Outcome::Success { value, task_id } => (task_id, value), _ => panic!("unexpected error"), }) .unzip(); - assert_eq!(values, (0..8).rev().collect::>()); task_ids.sort(); outcome_task_ids.sort(); assert_eq!(task_ids, outcome_task_ids); + values.sort(); + assert_eq!(values, (0..8).collect::>()); } - #[test] - fn test_map_store() { - let mut hive = Builder::new() - .num_threads(8) - .build_with_default::>(); + #[rstest] + fn test_map_store(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let mut hive = thunk_hive::(8, builder_factory(false)); let mut task_ids = hive.map_store((0..8u8).map(|i| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis((8 - i as u64) * 100)); i }) @@ -1083,14 +1245,16 @@ mod tests { assert_eq!(task_ids, outcome_task_ids); } - #[test] - fn test_swarm() { - let hive = Builder::new() - .num_threads(2) - .build_with_default::>(); + #[rstest] + fn test_swarm(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive::(2, builder_factory(false)); let outputs: Vec<_> = hive .swarm((0..10u8).map(|i| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis((10 - i as u64) * 100)); i }) @@ -1100,33 +1264,41 @@ mod tests { assert_eq!(outputs, (0..10).collect::>()) } - #[test] - fn test_swarm_unordered() { - let hive = Builder::new() - .num_threads(8) - .build_with_default::>(); - let outputs: Vec<_> = hive + #[rstest] + fn test_swarm_unordered( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive::(8, builder_factory(false)); + let mut outputs: Vec<_> = hive .swarm_unordered((0..8u8).map(|i| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis((8 - i as u64) * 100)); i }) })) .map(Outcome::unwrap) .collect(); - assert_eq!(outputs, (0..8).rev().collect::>()) + outputs.sort(); + assert_eq!(outputs, (0..8).collect::>()) } - #[test] - fn test_swarm_send() { - let hive = Builder::new() - .num_threads(8) - .build_with_default::>(); + #[rstest] + fn test_swarm_send(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive::(8, builder_factory(false)); + #[cfg(feature = "local-batch")] + assert_eq!(hive.worker_batch_limit(), 0); let (tx, rx) = super::outcome_channel(); let mut task_ids = hive.swarm_send( (0..8u8).map(|i| { - Thunk::of(move || { - thread::sleep(Duration::from_millis((8 - i as u64) * 100)); + Thunk::from(move || { + thread::sleep(Duration::from_millis((8 - i as u64) * 200)); i }) }), @@ -1145,13 +1317,15 @@ mod tests { assert_eq!(task_ids, outcome_task_ids); } - #[test] - fn test_swarm_store() { - let mut hive = Builder::new() - .num_threads(8) - .build_with_default::>(); + #[rstest] + fn test_swarm_store(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let mut hive = thunk_hive::(8, builder_factory(false)); let mut task_ids = hive.swarm_store((0..8u8).map(|i| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis((8 - i as u64) * 100)); i }) @@ -1171,12 +1345,17 @@ mod tests { assert_eq!(task_ids, outcome_task_ids); } - #[test] - fn test_scan() { - let hive = Builder::new() + #[rstest] + fn test_scan(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(Caller::from(|i: usize| i * i)) .num_threads(4) - .build_with(Caller::of(|i| i * i)); - let (outputs, state) = hive.scan(0..10, 0, |acc, i| { + .build(); + let (outputs, state) = hive.scan(0..10usize, 0, |acc, i| { *acc += i; *acc }); @@ -1196,11 +1375,16 @@ mod tests { ); } - #[test] - fn test_scan_send() { - let hive = Builder::new() + #[rstest] + fn test_scan_send(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(Caller::from(|i: i32| i * i)) .num_threads(4) - .build_with(Caller::of(|i| i * i)); + .build(); let (tx, rx) = super::outcome_channel(); let (mut task_ids, state) = hive.scan_send(0..10, tx, 0, |acc, i| { *acc += i; @@ -1231,11 +1415,75 @@ mod tests { assert_eq!(task_ids, outcome_task_ids); } - #[test] - fn test_try_scan_send() { - let hive = Builder::new() + #[rstest] + fn test_try_scan(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(Caller::from(|i: i32| i * i)) .num_threads(4) - .build_with(Caller::of(|i| i * i)); + .build(); + let (outcomes, error, state) = hive.try_scan(0..10, 0, |acc, i| { + *acc += i; + Ok::<_, String>(*acc) + }); + let task_ids: Vec<_> = outcomes.success_task_ids(); + assert_eq!(task_ids.len(), 10); + assert_eq!(error.len(), 0); + assert_eq!(state, 45); + let mut values: Vec<_> = outcomes + .into_iter() + .select_unordered(task_ids) + .into_outputs() + .collect(); + values.sort(); + assert_eq!( + values, + (0..10) + .scan(0, |acc, i| { + *acc += i; + Some(*acc) + }) + .map(|i| i * i) + .collect::>() + ); + } + + #[rstest] + #[should_panic] + fn test_try_scan_fail(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(Caller::from(|i: i32| i * i)) + .num_threads(4) + .build(); + let (outcomes, error, state) = hive.try_scan(0..10, 0, |_, _| Err::("fail")); + let task_ids: Vec<_> = outcomes.success_task_ids(); + assert_eq!(task_ids.len(), 10); + assert_eq!(error.len(), 0); + assert_eq!(state, 45); + let _ = outcomes + .into_iter() + .select_unordered(task_ids) + .into_outputs() + .collect::>(); + } + + #[rstest] + fn test_try_scan_send(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(Caller::from(|i: i32| i * i)) + .num_threads(4) + .build(); let (tx, rx) = super::outcome_channel(); let (results, state) = hive.try_scan_send(0..10, tx, 0, |acc, i| { *acc += i; @@ -1267,26 +1515,37 @@ mod tests { assert_eq!(task_ids, outcome_task_ids); } - #[test] + #[rstest] #[should_panic] - fn test_try_scan_send_fail() { - let hive = Builder::new() + fn test_try_scan_send_fail( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(OnceCaller::from(|i: i32| Ok::<_, String>(i * i))) .num_threads(4) - .build_with(OnceCaller::of(|i: i32| Ok::<_, String>(i * i))); + .build(); let (tx, _) = super::outcome_channel(); let _ = hive - .try_scan_send(0..10, tx, 0, |_, _| Err("fail")) + .try_scan_send(0..10, &tx, 0, |_, _| Err::("fail")) .0 .into_iter() .map(Result::unwrap) .collect::>(); } - #[test] - fn test_scan_store() { - let mut hive = Builder::new() + #[rstest] + fn test_scan_store(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let mut hive = builder_factory(false) + .with_worker(Caller::from(|i: i32| i * i)) .num_threads(4) - .build_with(Caller::of(|i| i * i)); + .build(); let (mut task_ids, state) = hive.scan_store(0..10, 0, |acc, i| { *acc += i; *acc @@ -1317,11 +1576,17 @@ mod tests { assert_eq!(task_ids, outcome_task_ids); } - #[test] - fn test_try_scan_store() { - let mut hive = Builder::new() + #[rstest] + fn test_try_scan_store( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let mut hive = builder_factory(false) + .with_worker(Caller::from(|i: i32| i * i)) .num_threads(4) - .build_with(Caller::of(|i| i * i)); + .build(); let (results, state) = hive.try_scan_store(0..10, 0, |acc, i| { *acc += i; Ok::(*acc) @@ -1353,28 +1618,106 @@ mod tests { assert_eq!(task_ids, outcome_task_ids); } - #[test] + #[rstest] #[should_panic] - fn test_try_scan_store_fail() { - let hive = Builder::new() + fn test_try_scan_store_fail( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(OnceCaller::from(|i: i32| Ok::(i * i))) .num_threads(4) - .build_with(OnceCaller::of(|i: i32| Ok::(i * i))); + .build(); let _ = hive - .try_scan_store(0..10, 0, |_, _| Err("fail")) + .try_scan_store(0..10, 0, |_, _| Err::("fail")) .0 .into_iter() .map(Result::unwrap) .collect::>(); } - #[test] - fn test_husk() { - let hive1 = Builder::new() - .num_threads(8) - .build_with_default::>(); - let task_ids = hive1.map_store((0..8u8).map(|i| Thunk::of(move || i))); + const NUM_FIRST_TASKS: usize = 4; + + #[derive(Debug, Default)] + struct SendWorker; + + impl Worker for SendWorker { + type Input = usize; + type Output = usize; + type Error = (); + + fn apply(&mut self, input: Self::Input, ctx: &Context) -> WorkerResult { + if input < NUM_FIRST_TASKS { + ctx.submit(input + NUM_FIRST_TASKS) + .map_err(|input| ApplyError::Retryable { input, error: () })?; + } + Ok(input) + } + } + + #[rstest] + fn test_send_from_task( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .num_threads(2) + .with_worker_default::() + .build(); + let (tx, rx) = super::outcome_channel(); + let task_ids = hive.map_send(0..NUM_FIRST_TASKS, tx); + hive.join(); + // each task submits another task + assert_eq!(task_ids.len(), NUM_FIRST_TASKS); + let outputs: Vec<_> = rx.select_ordered_outputs(task_ids).collect(); + assert_eq!(outputs.len(), NUM_FIRST_TASKS * 2); + assert_eq!(outputs, (0..NUM_FIRST_TASKS * 2).collect::>()); + } + + #[rstest] + fn test_close(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive1 = thunk_hive::(8, builder_factory(false)); + let _ = hive1.map_store((0..8u8).map(|i| Thunk::from(move || i))); + hive1.join(); + let hive2 = hive1.clone(); + assert!(!hive1.close(false)); + assert!(hive2.close(false)); + } + + #[rstest] + fn test_into_outcomes(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = thunk_hive::(8, builder_factory(false)); + let task_ids = hive.map_store((0..8u8).map(|i| Thunk::from(move || i))); + hive.join(); + let outcomes = hive.try_into_outcomes(false).unwrap(); + for i in task_ids.iter() { + assert!(outcomes.get(i).unwrap().is_success()); + assert!(matches!(outcomes.get(i), Some(Outcome::Success { .. }))); + } + } + + #[rstest] + fn test_husk(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive1 = thunk_hive::(8, builder_factory(false)); + let task_ids = hive1.map_store((0..8u8).map(|i| Thunk::from(move || i))); hive1.join(); - let mut husk1 = hive1.try_into_husk().unwrap(); + let mut husk1 = hive1.try_into_husk(false).unwrap(); for i in task_ids.iter() { assert!(husk1.outcomes_deref().get(i).unwrap().is_success()); assert!(matches!(husk1.get(*i), Some(Outcome::Success { .. }))); @@ -1383,15 +1726,17 @@ mod tests { let builder = husk1.as_builder(); let hive2 = builder .num_threads(4) - .build_with_default::>(); + .with_worker_default::>() + .with_channel_queues() + .build(); hive2.map_store((0..8u8).map(|i| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis((8 - i as u64) * 100)); i }) })); hive2.join(); - let mut husk2 = hive2.try_into_husk().unwrap(); + let mut husk2 = hive2.try_into_husk(false).unwrap(); let mut outputs1 = husk1 .remove_all() @@ -1407,15 +1752,15 @@ mod tests { outputs2.sort(); assert_eq!(outputs1, outputs2); - let hive3 = husk1.into_hive(); + let hive3 = husk1.into_hive::>(); hive3.map_store((0..8u8).map(|i| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis((8 - i as u64) * 100)); i }) })); hive3.join(); - let husk3 = hive3.try_into_husk().unwrap(); + let husk3 = hive3.try_into_husk(false).unwrap(); let (_, outcomes3) = husk3.into_parts(); let mut outputs3 = outcomes3 .into_iter() @@ -1425,16 +1770,21 @@ mod tests { assert_eq!(outputs1, outputs3); } - #[test] - fn test_clone() { - let hive = Builder::new() + #[rstest] + fn test_clone(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_worker_default() .thread_name("clone example") .num_threads(2) - .build_with_default::>(); + .build(); // This batch of tasks will occupy the pool for some time for _ in 0..6 { - hive.apply_store(Thunk::of(|| { + hive.apply_store(Thunk::from(|| { thread::sleep(SHORT_TASK); })); } @@ -1449,7 +1799,7 @@ mod tests { let (tx, rx) = mpsc::channel(); for i in 0..42 { let tx = tx.clone(); - hive.apply_store(Thunk::of(move || { + hive.apply_store(Thunk::from(move || { tx.send(i).expect("channel will be waiting"); })); } @@ -1466,7 +1816,7 @@ mod tests { let (tx, rx) = mpsc::channel(); for i in 1..12 { let tx = tx.clone(); - pool.apply_store(Thunk::of(move || { + pool.apply_store(Thunk::from(move || { tx.send(i).expect("channel will be waiting"); })); } @@ -1487,22 +1837,47 @@ mod tests { ); } - type VoidThunkWorker = ThunkWorker<()>; - type VoidThunkWorkerHive = Hive>; + #[rstest] + fn test_clone_into_husk_fails( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive1: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_worker_default() + .num_threads(2) + .build(); + let hive2 = hive1.clone(); + // should return None the first time since there is more than one reference + assert!(hive1.try_into_husk(false).is_none()); + // hive1 has been dropped, so we're down to 1 reference and it should succeed + assert!(hive2.try_into_husk(false).is_some()); + } - #[test] - fn test_send() { + #[rstest] + fn test_channel_hive_send() { fn assert_send() {} - assert_send::(); + assert_send::>, ChannelTaskQueues<_>>>(); } - #[test] - fn test_cloned_eq() { - let a = thunk_hive::<()>(2); + #[rstest] + fn test_workstealing_hive_send() { + fn assert_send() {} + assert_send::>, WorkstealingTaskQueues<_>>>(); + } + + #[rstest] + fn test_cloned_eq(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let a = thunk_hive::<(), _, _>(2, builder_factory(true)); assert_eq!(a, a.clone()); } - #[test] + #[rstest] /// When a thread joins on a pool, it blocks until all tasks have completed. If a second thread /// adds tasks to the pool and then joins before all the tasks have completed, both threads /// will wait for all tasks to complete. However, as soon as all tasks have completed, all @@ -1511,16 +1886,21 @@ mod tests { /// changed and will wake, even if new tasks have been added in the meantime. /// /// In this example, this means the waiting threads will exit the join in groups of four - /// because the waiter pool has four processes. + /// because the waiter pool has four processes + /// + /// TODO: make this test work with WorkstealingTaskQueues. fn test_join_wavesurfer() { let n_waves = 4; let n_workers = 4; let (tx, rx) = mpsc::channel(); - let builder = Builder::new() + let builder = channel_builder(false) .num_threads(n_workers) .thread_name("join wavesurfer"); - let waiter_hive = builder.clone().build_with_default::>(); - let clock_hive = builder.build_with_default::>(); + let waiter_hive = builder + .clone() + .with_worker_default::>() + .build(); + let clock_hive = builder.with_worker_default::>().build(); let barrier = Arc::new(Barrier::new(3)); let wave_counter = Arc::new(AtomicUsize::new(0)); @@ -1538,7 +1918,7 @@ mod tests { { let barrier = barrier.clone(); - clock_hive.apply_store(Thunk::of(move || { + clock_hive.apply_store(Thunk::from(move || { barrier.wait(); // this sleep is for stabilisation on weaker platforms thread::sleep(Duration::from_millis(100)); @@ -1550,11 +1930,11 @@ mod tests { let tx = tx.clone(); let clock_hive = clock_hive.clone(); let wave_counter = wave_counter.clone(); - waiter_hive.apply_store(Thunk::of(move || { + waiter_hive.apply_store(Thunk::from(move || { let wave_before = wave_counter.load(Ordering::SeqCst); clock_hive.join(); // submit tasks for the next wave - clock_hive.apply_store(Thunk::of(|| thread::sleep(ONE_SEC))); + clock_hive.apply_store(Thunk::from(|| thread::sleep(ONE_SEC))); let wave_after = wave_counter.load(Ordering::SeqCst); tx.send((wave_before, wave_after, worker)).unwrap(); })); @@ -1598,30 +1978,39 @@ mod tests { // cargo-llvm-cov doesn't yet support doctests in stable, so we need to duplicate them in // unit tests to get coverage - #[test] - fn doctest_lib_2() { + #[rstest] + fn doctest_lib_2(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { // create a hive to process `Thunk`s - no-argument closures with the same return type (`i32`) - let hive = Builder::new() + let hive: Hive>, B::TaskQueues<_>> = builder_factory(false) + .with_worker_default() .num_threads(4) .thread_name("thunk_hive") - .build_with_default::>(); + .build(); // return results to your own channel... let (tx, rx) = crate::hive::outcome_channel(); - let task_ids = hive.swarm_send((0..10).map(|i: i32| Thunk::of(move || i * i)), tx); + let task_ids = hive.swarm_send((0..10).map(|i: i32| Thunk::from(move || i * i)), &tx); let outputs: Vec<_> = rx.select_unordered_outputs(task_ids).collect(); assert_eq!(285, outputs.into_iter().sum()); // return results as an iterator... let outputs2: Vec<_> = hive - .swarm((0..10).map(|i: i32| Thunk::of(move || i * -i))) + .swarm((0..10).map(|i: i32| Thunk::from(move || i * -i))) .into_outputs() .collect(); assert_eq!(-285, outputs2.into_iter().sum()); } - #[test] - fn doctest_lib_3() { + #[rstest] + fn doctest_lib_3(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { #[derive(Debug)] struct CatWorker { stdin: ChildStdin, @@ -1652,7 +2041,7 @@ mod tests { type Output = String; type Error = io::Error; - fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { + fn apply(&mut self, input: Self::Input, _: &Context) -> WorkerResult { self.write_char(input).map_err(|error| ApplyError::Fatal { input: Some(input), error, @@ -1674,7 +2063,7 @@ mod tests { } } - impl Queen for CatQueen { + impl QueenMut for CatQueen { type Kind = CatWorker; fn create(&mut self) -> Self::Kind { @@ -1704,7 +2093,10 @@ mod tests { } // build the Hive - let hive = Builder::new().num_threads(4).build_default::(); + let hive = builder_factory(false) + .with_queen_mut_default::() + .num_threads(4) + .build(); // prepare inputs let inputs: Vec = (0..8).map(|i| 97 + i).collect(); @@ -1724,7 +2116,12 @@ mod tests { assert_eq!(output, b"abcdefgh"); // shutdown the hive, use the Queen to wait on child processes, and report errors - let (mut queen, _) = hive.try_into_husk().unwrap().into_parts(); + let mut queen = hive + .try_into_husk(false) + .unwrap() + .into_parts() + .0 + .into_inner(); let (wait_ok, wait_err): (Vec<_>, Vec<_>) = queen.wait_for_all().into_iter().partition(Result::is_ok); if !wait_err.is_empty() { @@ -1751,18 +2148,27 @@ mod tests { #[cfg(all(test, feature = "affinity"))] mod affinity_tests { use crate::bee::stock::{Thunk, ThunkWorker}; - use crate::hive::Builder; + use crate::channel::{Message, ReceiverExt}; + use crate::hive::{Builder, Outcome, TaskQueuesBuilder, channel_builder, workstealing_builder}; + use rstest::*; + use std::thread; + use std::time::Duration; - #[test] - fn test_affinity() { - let hive = Builder::new() + #[rstest] + fn test_affinity(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) .thread_name("affinity example") .num_threads(2) .core_affinity(0..2) - .build_with_default::>(); + .with_worker_default::>() + .build(); hive.map_store((0..10).map(move |i| { - Thunk::of(move || { + Thunk::from(move || { if let Some(affininty) = core_affinity::get_core_ids() { eprintln!("task {} on thread with affinity {:?}", i, affininty); } @@ -1770,16 +2176,72 @@ mod affinity_tests { })); } - #[test] - fn test_use_all_cores() { - let hive = Builder::new() + #[rstest] + fn test_use_all_cores_builder( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) .thread_name("affinity example") .with_thread_per_core() .with_default_core_affinity() - .build_with_default::>(); + .with_worker_default::>() + .build(); hive.map_store((0..num_cpus::get()).map(move |i| { - Thunk::of(move || { + Thunk::from(move || { + if let Some(affininty) = core_affinity::get_core_ids() { + eprintln!("task {} on thread with affinity {:?}", i, affininty); + } + }) + })); + } + + #[rstest] + fn test_grow_with_affinity( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .thread_name("affinity example") + .with_default_core_affinity() + .with_worker_default::>() + .build(); + // check that with 0 threads no tasks are scheduled + let (tx, rx) = super::outcome_channel(); + let _ = hive.apply_send(Thunk::from(|| 0), &tx); + thread::sleep(Duration::from_secs(1)); + assert_eq!(hive.num_tasks().0, 1); + assert!(matches!(rx.try_recv_msg(), Message::ChannelEmpty)); + assert!(matches!(hive.grow_with_affinity(0, vec![]), Ok(0))); + thread::sleep(Duration::from_secs(1)); + assert_eq!(hive.num_tasks().0, 1); + assert!(matches!(hive.grow_with_affinity(1, vec![0]), Ok(1))); + thread::sleep(Duration::from_secs(1)); + assert_eq!(hive.num_tasks().0, 0); + assert!(matches!( + rx.try_recv_msg(), + Message::Received(Outcome::Success { value: 0, .. }) + )); + } + + #[rstest] + fn test_use_all_cores_hive() { + let hive = crate::hive::channel_builder(false) + .thread_name("affinity example") + .with_default_core_affinity() + .with_worker_default::>() + .build(); + + let num_cores = num_cpus::get(); + assert_eq!(hive.use_all_cores_with_affinity().unwrap(), num_cores); + + hive.map_store((0..num_cpus::get()).map(move |i| { + Thunk::from(move || { if let Some(affininty) = core_affinity::get_core_ids() { eprintln!("task {} on thread with affinity {:?}", i, affininty); } @@ -1788,18 +2250,22 @@ mod affinity_tests { } } -#[cfg(all(test, feature = "batching"))] -mod batching_tests { +#[cfg(all(test, feature = "local-batch"))] +mod local_batch_tests { use crate::barrier::IndexedBarrier; - use crate::bee::stock::{Thunk, ThunkWorker}; use crate::bee::DefaultQueen; - use crate::hive::{Builder, Hive, OutcomeIteratorExt, OutcomeReceiver, OutcomeSender}; + use crate::bee::stock::{Thunk, ThunkWorker}; + use crate::hive::{ + Builder, Hive, OutcomeIteratorExt, OutcomeReceiver, OutcomeSender, TaskQueues, + TaskQueuesBuilder, channel_builder, workstealing_builder, + }; + use rstest::*; use std::collections::HashMap; use std::thread::{self, ThreadId}; use std::time::Duration; - fn launch_tasks( - hive: &Hive, DefaultQueen>>, + fn launch_tasks>>( + hive: &Hive>, T>, num_threads: usize, num_tasks_per_thread: usize, barrier: &IndexedBarrier, @@ -1811,12 +2277,12 @@ mod batching_tests { .map(|_| { let barrier = barrier.clone(); let task_id = hive.apply_send( - Thunk::of(move || { + Thunk::from(move || { barrier.wait(); thread::sleep(Duration::from_millis(100)); thread::current().id() }), - tx.clone(), + tx, ); thread::sleep(Duration::from_millis(100)); task_id @@ -1825,12 +2291,12 @@ mod batching_tests { // send the rest all at once let rest_task_ids = hive.map_send( (num_threads..total_tasks).map(|_| { - Thunk::of(move || { + Thunk::from(move || { thread::sleep(Duration::from_millis(1)); thread::current().id() }) }), - tx.clone(), + tx, ); init_task_ids.into_iter().chain(rest_task_ids).collect() } @@ -1846,15 +2312,16 @@ mod batching_tests { }) } - fn run_test( - hive: &Hive, DefaultQueen>>, + fn run_test>>( + hive: &Hive>, T>, num_threads: usize, - batch_size: usize, + batch_limit: usize, + assert_exact: bool, ) { - let tasks_per_thread = batch_size + 2; + let tasks_per_thread = batch_limit + 2; let (tx, rx) = crate::hive::outcome_channel(); - // each worker should take `batch_size` tasks for its queue + 1 to work on immediately, - // meaning there should be `batch_size + 1` tasks associated with each thread ID + // each worker should take `batch_limit` tasks for its queue + 1 to work on immediately, + // meaning there should be `batch_limit + 1` tasks associated with each thread ID let barrier = IndexedBarrier::new(num_threads); let task_ids = launch_tasks(hive, num_threads, tasks_per_thread, &barrier, &tx); // start the first tasks @@ -1863,76 +2330,369 @@ mod batching_tests { hive.join(); let thread_counts = count_thread_ids(rx, task_ids); assert_eq!(thread_counts.len(), num_threads); - assert!(thread_counts - .values() - .all(|&count| count == tasks_per_thread)); + assert_eq!( + thread_counts.values().sum::(), + tasks_per_thread * num_threads + ); + if assert_exact { + assert!( + thread_counts + .values() + .all(|&count| count == tasks_per_thread) + ); + } else { + assert!(thread_counts.values().all(|&count| count > 0)); + } } - #[test] - fn test_batching() { + #[rstest] + fn test_local_batch_channel() { const NUM_THREADS: usize = 4; - const BATCH_SIZE: usize = 24; - let hive = Builder::new() + const BATCH_LIMIT: usize = 24; + let hive = channel_builder(false) + .with_worker_default() .num_threads(NUM_THREADS) - .batch_size(BATCH_SIZE) - .build_with_default::>(); - run_test(&hive, NUM_THREADS, BATCH_SIZE); + .batch_limit(BATCH_LIMIT) + .build(); + run_test(&hive, NUM_THREADS, BATCH_LIMIT, true); } - #[test] - fn test_set_batch_size() { + #[rstest] + fn test_local_batch_workstealing() { + const NUM_THREADS: usize = 4; + const BATCH_LIMIT: usize = 24; + let hive = workstealing_builder(false) + .with_worker_default() + .num_threads(NUM_THREADS) + .batch_limit(BATCH_LIMIT) + .build(); + run_test(&hive, NUM_THREADS, BATCH_LIMIT, false); + } + + #[rstest] + fn test_set_batch_limit_channel() { const NUM_THREADS: usize = 4; - const BATCH_SIZE_0: usize = 10; - const BATCH_SIZE_1: usize = 20; - const BATCH_SIZE_2: usize = 50; - let hive = Builder::new() + const BATCH_LIMIT_0: usize = 10; + const BATCH_LIMIT_1: usize = 50; + const BATCH_LIMIT_2: usize = 20; + let hive = channel_builder(false) + .with_worker_default() .num_threads(NUM_THREADS) - .batch_size(BATCH_SIZE_0) - .build_with_default::>(); - run_test(&hive, NUM_THREADS, BATCH_SIZE_0); + .batch_limit(BATCH_LIMIT_0) + .build(); + run_test(&hive, NUM_THREADS, BATCH_LIMIT_0, true); // increase batch size - hive.set_worker_batch_size(BATCH_SIZE_2); - run_test(&hive, NUM_THREADS, BATCH_SIZE_2); + hive.set_worker_batch_limit(BATCH_LIMIT_1); + run_test(&hive, NUM_THREADS, BATCH_LIMIT_1, true); // decrease batch size - hive.set_worker_batch_size(BATCH_SIZE_1); - run_test(&hive, NUM_THREADS, BATCH_SIZE_1); + hive.set_worker_batch_limit(BATCH_LIMIT_2); + run_test(&hive, NUM_THREADS, BATCH_LIMIT_2, true); } - #[test] - fn test_shrink_batch_size() { + #[rstest] + fn test_set_batch_limit_workstealing() { + const NUM_THREADS: usize = 4; + const BATCH_LIMIT_0: usize = 10; + const BATCH_LIMIT_1: usize = 50; + const BATCH_LIMIT_2: usize = 20; + let hive = workstealing_builder(false) + .with_worker_default() + .num_threads(NUM_THREADS) + .batch_limit(BATCH_LIMIT_0) + .build(); + run_test(&hive, NUM_THREADS, BATCH_LIMIT_0, false); + // increase batch size + hive.set_worker_batch_limit(BATCH_LIMIT_1); + run_test(&hive, NUM_THREADS, BATCH_LIMIT_1, false); + // decrease batch size + hive.set_worker_batch_limit(BATCH_LIMIT_2); + run_test(&hive, NUM_THREADS, BATCH_LIMIT_2, false); + } + + // TODO: make this work with WorkstealingTaskQueues + #[rstest] + fn test_shrink_batch_limit() { const NUM_THREADS: usize = 4; const NUM_TASKS_PER_THREAD: usize = 125; - const BATCH_SIZE_0: usize = 100; - const BATCH_SIZE_1: usize = 10; - let hive = Builder::new() + const BATCH_LIMIT_0: usize = 100; + const BATCH_LIMIT_1: usize = 10; + let hive = channel_builder(false) + .with_worker_default() .num_threads(NUM_THREADS) - .batch_size(BATCH_SIZE_0) - .build_with_default::>(); + .batch_limit(BATCH_LIMIT_0) + .build(); let (tx, rx) = crate::hive::outcome_channel(); let barrier = IndexedBarrier::new(NUM_THREADS); let task_ids = launch_tasks(&hive, NUM_THREADS, NUM_TASKS_PER_THREAD, &barrier, &tx); let total_tasks = NUM_THREADS * NUM_TASKS_PER_THREAD; assert_eq!(task_ids.len(), total_tasks); barrier.wait(); - hive.set_worker_batch_size(BATCH_SIZE_1); + hive.set_worker_batch_limit(BATCH_LIMIT_1); // The number of tasks completed by each thread could be variable, so we want to ensure - // that a) each processed at least `BATCH_SIZE_0` tasks, and b) there are a total of + // that a) each processed at least `BATCH_LIMIT_0` tasks, and b) there are a total of // `NUM_TASKS` outputs with no errors hive.join(); let thread_counts = count_thread_ids(rx, task_ids); - assert!(thread_counts.values().all(|count| *count > BATCH_SIZE_0)); + assert!(thread_counts.values().all(|count| *count > BATCH_LIMIT_0)); assert_eq!(thread_counts.values().sum::(), total_tasks); } + + #[test] + fn test_change_channel_batch_limit_nonempty() {} +} + +#[cfg(all(test, feature = "local-batch"))] +mod weighted_map_tests { + use crate::bee::stock::{RetryCaller, Thunk, ThunkWorker}; + use crate::bee::{ApplyError, Context}; + use crate::hive::{ + Builder, Outcome, OutcomeIteratorExt, TaskQueuesBuilder, Weighted, WeightedIteratorExt, + channel_builder, workstealing_builder, + }; + use rstest::*; + use std::collections::HashMap; + use std::thread; + use std::time::Duration; + + #[rstest] + fn test_map_weighted(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + const NUM_THREADS: usize = 4; + const BATCH_LIMIT: usize = 24; + let hive = builder_factory(false) + .with_worker_default::>() + .num_threads(NUM_THREADS) + .batch_limit(BATCH_LIMIT) + .build(); + let inputs = (0..10u8) + .map(|i| { + Thunk::from(move || { + thread::sleep(Duration::from_millis((10 - i as u64) * 100)); + i + }) + }) + .map(|thunk| (thunk, 0)) + .into_weighted(); + let outputs: Vec<_> = hive.map(inputs).map(Outcome::unwrap).collect(); + assert_eq!(outputs, (0..10).collect::>()) + } + + #[rstest] + fn test_map_weighted_with_limit( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + const NUM_THREADS: usize = 4; + const NUM_TASKS_PER_THREAD: usize = 3; + const NUM_TASKS: usize = NUM_THREADS * NUM_TASKS_PER_THREAD; + const BATCH_LIMIT: usize = 10; + const WEIGHT: u32 = 25; + const WEIGHT_LIMIT: u64 = WEIGHT as u64 * NUM_TASKS_PER_THREAD as u64; + // schedule 2 * NUM_THREADS tasks, and set the weight limit at 2 * task weight, such that, + // even though the batch size is > 2, each thread should only take 2 tasks; don't start any + // threads yet, as there can be a delay before the tasks are available that can lead to + // the first call to `try_pop` queuing a smaller than expected batch + let hive = builder_factory(false) + .with_worker(RetryCaller::from( + |i: u8, ctx: &Context| -> Result<(u8, Option), ApplyError> { + thread::sleep(Duration::from_millis(500)); + Ok((i, ctx.thread_index())) + }, + )) + .batch_limit(BATCH_LIMIT) + .weight_limit(WEIGHT_LIMIT) + .build(); + let inputs = (0..NUM_TASKS as u8).map(|i| (i, WEIGHT)).into_weighted(); + let (tx, rx) = crate::hive::outcome_channel(); + let task_ids = hive.map_send(inputs, tx); + // wait for tasks to be scheduled + thread::sleep(Duration::from_secs(1)); + assert_eq!(hive.grow(NUM_THREADS).unwrap(), NUM_THREADS); + // wait for all tasks to complete + hive.join(); + let (mut outputs, thread_indices) = rx + .into_iter() + .select_unordered_outputs(task_ids) + .unzip::<_, _, Vec<_>, Vec<_>>(); + outputs.sort(); + assert_eq!(outputs, (0..NUM_TASKS as u8).collect::>()); + let counts = + thread_indices + .into_iter() + .flatten() + .fold(HashMap::new(), |mut counts, index| { + counts + .entry(index) + .and_modify(|count| *count += 1) + .or_insert(1); + counts + }); + assert!(counts.values().all(|&count| count == NUM_TASKS_PER_THREAD)); + } + + #[rstest] + fn test_overweight() { + const WEIGHT_LIMIT: u64 = 99; + let hive = channel_builder(false) + .with_worker_default::>() + .num_threads(1) + .weight_limit(WEIGHT_LIMIT) + .build(); + let outcome = hive.apply(Weighted::new(Thunk::from(|| 0), 100)); + assert!(matches!( + outcome, + Outcome::WeightLimitExceeded { weight: 100, .. } + )) + } + + #[rstest] + fn test_set_weight_limit() { + const WEIGHT_LIMIT: u64 = 99; + let hive = channel_builder(false) + .with_worker_default::>() + .num_threads(1) + .weight_limit(WEIGHT_LIMIT) + .build(); + assert_eq!(WEIGHT_LIMIT, hive.worker_weight_limit()); + let outcome = hive.apply(Weighted::new(Thunk::from(|| 0), WEIGHT_LIMIT + 1)); + assert!(matches!( + outcome, + Outcome::WeightLimitExceeded { weight: 100, .. } + )); + hive.set_worker_weight_limit(WEIGHT_LIMIT + 1); + assert_eq!(WEIGHT_LIMIT + 1, hive.worker_weight_limit()); + let outcome = hive.apply(Weighted::new(Thunk::from(|| 0), WEIGHT_LIMIT + 1)); + assert!(matches!(outcome, Outcome::Success { .. })); + } +} + +#[cfg(all(test, feature = "local-batch"))] +mod weighted_swarm_tests { + use crate::bee::stock::{EchoWorker, Thunk, ThunkWorker}; + use crate::hive::{ + Builder, Outcome, TaskQueuesBuilder, WeightedIteratorExt, channel_builder, + workstealing_builder, + }; + use rstest::*; + use std::thread; + use std::time::Duration; + + #[rstest] + fn test_swarm_weighted( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + const NUM_THREADS: usize = 4; + const BATCH_LIMIT: usize = 24; + let hive = builder_factory(false) + .with_worker_default::>() + .num_threads(NUM_THREADS) + .batch_limit(BATCH_LIMIT) + .build(); + let inputs = (0..10u8) + .map(|i| { + Thunk::from(move || { + thread::sleep(Duration::from_millis((10 - i as u64) * 100)); + i + }) + }) + .map(|thunk| (thunk, 0)) + .into_weighted_exact(); + let outputs: Vec<_> = hive.swarm(inputs).map(Outcome::unwrap).collect(); + assert_eq!(outputs, (0..10).collect::>()) + } + + #[rstest] + fn test_swarm_default_weighted( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + const NUM_THREADS: usize = 4; + const BATCH_LIMIT: usize = 24; + let hive = builder_factory(false) + .with_worker_default::>() + .num_threads(NUM_THREADS) + .batch_limit(BATCH_LIMIT) + .build(); + let inputs = (0..10u8) + .map(|i| { + Thunk::from(move || { + thread::sleep(Duration::from_millis((10 - i as u64) * 100)); + i + }) + }) + .into_default_weighted_exact(); + let outputs: Vec<_> = hive.swarm(inputs).map(Outcome::unwrap).collect(); + assert_eq!(outputs, (0..10).collect::>()) + } + + #[rstest] + fn test_swarm_const_weighted( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + const NUM_THREADS: usize = 4; + const BATCH_LIMIT: usize = 24; + let hive = builder_factory(false) + .with_worker_default::>() + .num_threads(NUM_THREADS) + .batch_limit(BATCH_LIMIT) + .build(); + let inputs = (0..10u8) + .map(|i| { + Thunk::from(move || { + thread::sleep(Duration::from_millis((10 - i as u64) * 100)); + i + }) + }) + .into_const_weighted_exact(0); + let outputs: Vec<_> = hive.swarm(inputs).map(Outcome::unwrap).collect(); + assert_eq!(outputs, (0..10).collect::>()) + } + + #[rstest] + fn test_swarm_identity_weighted( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + const NUM_THREADS: usize = 4; + const BATCH_LIMIT: usize = 24; + let hive = builder_factory(false) + .with_worker_default::>() + .num_threads(NUM_THREADS) + .batch_limit(BATCH_LIMIT) + .build(); + let inputs = (0..10u8).into_identity_weighted_exact(); + let outputs: Vec<_> = hive.swarm(inputs).map(Outcome::unwrap).collect(); + assert_eq!(outputs, (0..10).collect::>()) + } } #[cfg(all(test, feature = "retry"))] mod retry_tests { use crate::bee::stock::RetryCaller; use crate::bee::{ApplyError, Context}; - use crate::hive::{Builder, Outcome, OutcomeIteratorExt}; + use crate::hive::{ + Builder, Outcome, OutcomeIteratorExt, TaskQueuesBuilder, channel_builder, + workstealing_builder, + }; + use rstest::*; use std::time::{Duration, SystemTime}; - fn echo_time(i: usize, ctx: &Context) -> Result> { + fn echo_time(i: usize, ctx: &Context) -> Result> { let attempt = ctx.attempt(); if attempt == 3 { Ok("Success".into()) @@ -1946,21 +2706,33 @@ mod retry_tests { } } - #[test] - fn test_retries() { - let hive = Builder::new() + #[rstest] + fn test_retries(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(RetryCaller::from(echo_time)) .with_thread_per_core() .max_retries(3) .retry_factor(Duration::from_secs(1)) - .build_with(RetryCaller::of(echo_time)); + .build(); - let v: Result, _> = hive.swarm(0..10).into_results().collect(); + let v: Result, _> = hive.swarm(0..10usize).into_results().collect(); assert_eq!(v.unwrap().len(), 10); } - #[test] - fn test_retries_fail() { - fn sometimes_fail(i: usize, _: &Context) -> Result> { + #[rstest] + fn test_retries_fail(#[values(channel_builder, workstealing_builder)] builder_factory: F) + where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + fn sometimes_fail( + i: usize, + _: &Context, + ) -> Result> { match i % 3 { 0 => Ok("Success".into()), 1 => Err(ApplyError::Retryable { @@ -1975,12 +2747,13 @@ mod retry_tests { } } - let hive = Builder::new() + let hive = builder_factory(false) + .with_worker(RetryCaller::from(sometimes_fail)) .with_thread_per_core() .max_retries(3) - .build_with(RetryCaller::of(sometimes_fail)); + .build(); - let (success, retry_failed, not_retried) = hive.swarm(0..10).fold( + let (success, retry_failed, not_retried) = hive.swarm(0..10usize).fold( (0, 0, 0), |(success, retry_failed, not_retried), outcome| match outcome { Outcome::Success { .. } => (success + 1, retry_failed, not_retried), @@ -1995,13 +2768,48 @@ mod retry_tests { assert_eq!(not_retried, 3); } - #[test] - fn test_disable_retries() { - let hive = Builder::new() + #[rstest] + fn test_disable_retries( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(RetryCaller::from(echo_time)) + .with_thread_per_core() + .with_no_retries() + .build(); + let v: Result, _> = hive.swarm(0..10usize).into_results().collect(); + assert!(v.is_err()); + } + + #[rstest] + fn test_change_retry_limit( + #[values(channel_builder, workstealing_builder)] builder_factory: F, + ) where + B: TaskQueuesBuilder, + F: Fn(bool) -> B, + { + let hive = builder_factory(false) + .with_worker(RetryCaller::from(echo_time)) .with_thread_per_core() .with_no_retries() - .build_with(RetryCaller::of(echo_time)); - let v: Result, _> = hive.swarm(0..10).into_results().collect(); + .build(); + + assert_eq!(hive.worker_retry_limit(), 0); + assert_eq!(hive.worker_retry_factor(), Duration::from_secs(0)); + + let v: Result, _> = hive.swarm(0..10usize).into_results().collect(); assert!(v.is_err()); + + hive.set_worker_retry_limit(3); + hive.set_worker_retry_factor(Duration::from_secs(1)); + + assert_eq!(hive.worker_retry_limit(), 3); + assert_eq!(hive.worker_retry_factor(), Duration::from_secs(1)); + + let v: Result, _> = hive.swarm(0..10usize).into_results().collect(); + assert_eq!(v.unwrap().len(), 10); } } diff --git a/src/hive/outcome/batch.rs b/src/hive/outcome/batch.rs index af22e82..b42efff 100644 --- a/src/hive/outcome/batch.rs +++ b/src/hive/outcome/batch.rs @@ -1,9 +1,13 @@ use super::{DerefOutcomes, Outcome, OwnedOutcomes}; use crate::bee::{TaskId, Worker}; +use derive_more::Debug; +use std::any; use std::collections::HashMap; use std::ops::{Deref, DerefMut}; /// A batch of `Outcome`s. +#[derive(Debug)] +#[debug("OutcomeBatch<{}>", any::type_name::())] pub struct OutcomeBatch(HashMap>); impl OutcomeBatch { @@ -47,7 +51,9 @@ impl DerefOutcomes for OutcomeBatch { } } +/// Functions only used in testing. #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] impl OutcomeBatch { pub(crate) fn empty() -> Self { OutcomeBatch::new(HashMap::new()) diff --git a/src/hive/outcome/impl.rs b/src/hive/outcome/impl.rs new file mode 100644 index 0000000..79cf339 --- /dev/null +++ b/src/hive/outcome/impl.rs @@ -0,0 +1,510 @@ +use super::Outcome; +use crate::bee::{ApplyError, TaskId, TaskMeta, Worker, WorkerResult}; +use std::cmp::Ordering; +use std::mem; + +impl Outcome { + /// Converts a worker `result` into an `Outcome` with the given task_id and optional subtask ids. + pub(in crate::hive) fn from_worker_result( + result: WorkerResult, + task_meta: TaskMeta, + subtask_ids: Option>, + ) -> Self { + let task_id = task_meta.id(); + match (result, subtask_ids) { + (Ok(value), Some(subtask_ids)) => Self::SuccessWithSubtasks { + value, + task_id, + subtask_ids, + }, + (Ok(value), None) => Self::Success { value, task_id }, + (Err(ApplyError::Retryable { input, error, .. }), Some(subtask_ids)) => { + Self::FailureWithSubtasks { + input: Some(input), + error, + task_id, + subtask_ids, + } + } + (Err(ApplyError::Retryable { input, error }), None) => { + #[cfg(feature = "retry")] + { + Self::MaxRetriesAttempted { + input, + error, + task_id, + } + } + #[cfg(not(feature = "retry"))] + { + Self::Failure { + input: Some(input), + error, + task_id, + } + } + } + (Err(ApplyError::Fatal { input, error }), Some(subtask_ids)) => { + Self::FailureWithSubtasks { + input, + error, + task_id, + subtask_ids, + } + } + (Err(ApplyError::Fatal { input, error }), None) => Self::Failure { + input, + error, + task_id, + }, + (Err(ApplyError::Cancelled { input }), Some(subtask_ids)) => { + Self::UnprocessedWithSubtasks { + input, + task_id, + subtask_ids, + } + } + (Err(ApplyError::Cancelled { input }), None) => Self::Unprocessed { input, task_id }, + (Err(ApplyError::Panic { input, payload }), Some(subtask_ids)) => { + Self::PanicWithSubtasks { + input, + payload, + task_id, + subtask_ids, + } + } + (Err(ApplyError::Panic { input, payload }), None) => Self::Panic { + input, + payload, + task_id, + }, + } + } + + /// Creates a new `Outcome::Fatal` from the given input, task metadata, and error. + #[cfg(feature = "retry")] + pub(in crate::hive) fn from_fatal( + input: W::Input, + task_meta: TaskMeta, + error: W::Error, + ) -> Self { + Self::Failure { + input: Some(input), + error, + task_id: task_meta.id(), + } + } + + /// Returns `true` if this is a `Success` outcome. + pub fn is_success(&self) -> bool { + matches!(self, Self::Success { .. }) + } + + /// Returns `true` if this outcome represents an unprocessed task input. + pub fn is_unprocessed(&self) -> bool { + matches!(self, Self::Unprocessed { .. }) + } + + /// Returns `true` if this outcome represents a task processing failure. + pub fn is_failure(&self) -> bool { + match self { + Self::Failure { .. } | Self::Panic { .. } => true, + #[cfg(feature = "retry")] + Self::MaxRetriesAttempted { .. } => true, + _ => false, + } + } + + /// Returns the task_id of the task that produced this outcome. + pub fn task_id(&self) -> &TaskId { + match self { + Self::Success { task_id, .. } + | Self::SuccessWithSubtasks { task_id, .. } + | Self::Failure { task_id, .. } + | Self::FailureWithSubtasks { task_id, .. } + | Self::Unprocessed { task_id, .. } + | Self::UnprocessedWithSubtasks { task_id, .. } + | Self::Missing { task_id } + | Self::Panic { task_id, .. } + | Self::PanicWithSubtasks { task_id, .. } => task_id, + #[cfg(feature = "local-batch")] + Self::WeightLimitExceeded { task_id, .. } => task_id, + #[cfg(feature = "retry")] + Self::MaxRetriesAttempted { task_id, .. } => task_id, + } + } + + /// Returns the IDs of the tasks submitted by the task that produced this outcome, or `None` + /// if the task did not submit any subtasks. + pub fn subtask_ids(&self) -> Option<&Vec> { + match self { + Self::SuccessWithSubtasks { subtask_ids, .. } + | Self::FailureWithSubtasks { subtask_ids, .. } + | Self::UnprocessedWithSubtasks { subtask_ids, .. } + | Self::PanicWithSubtasks { subtask_ids, .. } => Some(subtask_ids), + _ => None, + } + } + + /// Consumes this `Outcome` and returns the value if it is a `Success`, otherwise panics. + pub fn unwrap(self) -> W::Output { + match self { + Self::Success { value, .. } | Self::SuccessWithSubtasks { value, .. } => value, + outcome => panic!("Not a success outcome: {:?}", outcome), + } + } + + /// Consumes this `Outcome` and returns the output value if it is a `Success`, otherwise `None`. + pub fn success(self) -> Option { + match self { + Self::Success { value, .. } | Self::SuccessWithSubtasks { value, .. } => Some(value), + _ => None, + } + } + + /// Consumes this `Outcome` and returns the input value if available, otherwise `None`. + pub fn try_into_input(self) -> Option { + match self { + Self::Failure { input, .. } + | Self::FailureWithSubtasks { input, .. } + | Self::Panic { input, .. } + | Self::PanicWithSubtasks { input, .. } => input, + Self::Unprocessed { input, .. } | Self::UnprocessedWithSubtasks { input, .. } => { + Some(input) + } + Self::Success { .. } | Self::SuccessWithSubtasks { .. } | Self::Missing { .. } => None, + #[cfg(feature = "local-batch")] + Self::WeightLimitExceeded { input, .. } => Some(input), + #[cfg(feature = "retry")] + Self::MaxRetriesAttempted { input, .. } => Some(input), + } + } + + /// Retursn a reference to the wrapped error, if any. + pub fn error(&self) -> Option<&W::Error> { + match self { + Self::Failure { error, .. } | Self::FailureWithSubtasks { error, .. } => Some(error), + #[cfg(feature = "retry")] + Self::MaxRetriesAttempted { error, .. } => Some(error), + _ => None, + } + } + + /// Consumes this `Outcome` and depending on the variant: + /// * Returns the wrapped error if this is a `Failure` or `MaxRetriesAttempted`, + /// * Resumes unwinding if this is a `Panic` outcome, + /// * Otherwise returns `None`. + pub fn try_into_error(self) -> Option { + match self { + Self::Failure { error, .. } | Self::FailureWithSubtasks { error, .. } => Some(error), + Self::Panic { payload, .. } | Self::PanicWithSubtasks { payload, .. } => { + payload.resume() + } + Self::Success { .. } + | Self::SuccessWithSubtasks { .. } + | Self::Unprocessed { .. } + | Self::UnprocessedWithSubtasks { .. } + | Self::Missing { .. } => None, + #[cfg(feature = "local-batch")] + Self::WeightLimitExceeded { .. } => None, + #[cfg(feature = "retry")] + Self::MaxRetriesAttempted { error, .. } => Some(error), + } + } +} + +impl PartialEq for Outcome { + fn eq(&self, other: &Self) -> bool { + mem::discriminant(self) == mem::discriminant(other) && self.task_id() == other.task_id() + } +} + +impl Eq for Outcome {} + +impl PartialOrd for Outcome { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Outcome { + fn cmp(&self, other: &Self) -> Ordering { + self.task_id().cmp(other.task_id()) + } +} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use super::Outcome; + use crate::bee::stock::EchoWorker; + use crate::bee::{ApplyError, TaskMeta, WorkerResult}; + use crate::panic::Panic; + + type Worker = EchoWorker; + type WorkerOutcome = Outcome; + + #[test] + fn test_success() { + let outcome = WorkerOutcome::Success { + value: 42, + task_id: 1, + }; + assert_eq!(outcome.success(), Some(42)); + } + + #[test] + fn test_unwrap() { + let outcome = WorkerOutcome::Success { + value: 42, + task_id: 1, + }; + assert_eq!(outcome.unwrap(), 42); + } + + #[test] + fn test_success_on_error() { + let outcome = WorkerOutcome::Failure { + input: Some(42), + error: (), + task_id: 1, + }; + assert!(outcome.success().is_none()); + } + + #[test] + #[should_panic] + fn test_unwrap_panics_on_error() { + let outcome = WorkerOutcome::Failure { + input: Some(42), + error: (), + task_id: 1, + }; + let _ = outcome.unwrap(); + } + + #[test] + fn test_retry_with_subtasks_into_failure() { + let input = 1; + let task_id = 1; + let error = (); + let result = WorkerResult::::Err(ApplyError::Retryable { input, error }); + let task_meta = TaskMeta::new(task_id); + let subtask_ids = vec![2, 3, 4]; + let outcome = + WorkerOutcome::from_worker_result(result, task_meta, Some(subtask_ids.clone())); + let expected_outcome = WorkerOutcome::FailureWithSubtasks { + input: Some(input), + error, + task_id, + subtask_ids, + }; + assert_eq!(outcome, expected_outcome); + } + + #[test] + fn test_subtasks() { + let input = 1; + let task_id = 1; + let error = (); + let task_meta = TaskMeta::new(task_id); + let subtask_ids = vec![2, 3, 4]; + + let result = WorkerResult::::Err(ApplyError::Fatal { + input: Some(input), + error, + }); + let outcome = + WorkerOutcome::from_worker_result(result, task_meta.clone(), Some(subtask_ids.clone())); + let expected_outcome = WorkerOutcome::FailureWithSubtasks { + input: Some(1), + task_id: 1, + error: (), + subtask_ids: vec![2, 3, 4], + }; + assert_eq!(outcome, expected_outcome); + + let result = WorkerResult::::Err(ApplyError::Cancelled { input }); + let outcome = + WorkerOutcome::from_worker_result(result, task_meta.clone(), Some(subtask_ids.clone())); + let expected_outcome = WorkerOutcome::UnprocessedWithSubtasks { + input: 1, + task_id: 1, + subtask_ids: vec![2, 3, 4], + }; + assert_eq!(outcome, expected_outcome); + + let result = WorkerResult::::Err(ApplyError::Panic { + input: Some(input), + payload: Panic::new("panicked", None), + }); + let outcome = + WorkerOutcome::from_worker_result(result, task_meta.clone(), Some(subtask_ids.clone())); + let expected_outcome = WorkerOutcome::PanicWithSubtasks { + input: Some(1), + task_id: 1, + subtask_ids: vec![2, 3, 4], + payload: Panic::new("panicked", None), + }; + assert_eq!(outcome, expected_outcome); + } + + #[test] + fn test_try_into_input() { + let outcome = WorkerOutcome::Success { + value: 42, + task_id: 1, + }; + assert_eq!(outcome.try_into_input(), None); + + let outcome = WorkerOutcome::Failure { + input: None, + error: (), + task_id: 2, + }; + assert_eq!(outcome.try_into_input(), None); + + let outcome = WorkerOutcome::Failure { + input: Some(42), + error: (), + task_id: 2, + }; + assert_eq!(outcome.try_into_input(), Some(42)); + + let outcome = WorkerOutcome::Unprocessed { + input: 42, + task_id: 3, + }; + assert_eq!(outcome.try_into_input(), Some(42)); + + let outcome = WorkerOutcome::Missing { task_id: 4 }; + assert_eq!(outcome.try_into_input(), None); + + let outcome = WorkerOutcome::Panic { + input: None, + payload: Panic::try_call(None, || panic!()).unwrap_err(), + task_id: 5, + }; + assert_eq!(outcome.try_into_input(), None); + + let outcome = WorkerOutcome::Panic { + input: Some(42), + payload: Panic::try_call(None, || panic!()).unwrap_err(), + task_id: 5, + }; + assert_eq!(outcome.try_into_input(), Some(42)); + } + + #[test] + fn test_try_into_error() { + let outcome = WorkerOutcome::Success { + value: 42, + task_id: 1, + }; + assert_eq!(outcome.try_into_error(), None); + + let outcome = WorkerOutcome::Failure { + input: None, + error: (), + task_id: 2, + }; + assert_eq!(outcome.try_into_error(), Some(())); + + let outcome = WorkerOutcome::Failure { + input: Some(42), + error: (), + task_id: 2, + }; + assert_eq!(outcome.try_into_error(), Some(())); + + let outcome = WorkerOutcome::Unprocessed { + input: 42, + task_id: 3, + }; + assert_eq!(outcome.try_into_error(), None); + + let outcome = WorkerOutcome::Missing { task_id: 4 }; + assert_eq!(outcome.try_into_error(), None); + } + + #[test] + #[should_panic] + fn test_try_into_error_panic() { + WorkerOutcome::Panic { + input: None, + payload: Panic::try_call(None, || panic!()).unwrap_err(), + task_id: 5, + } + .try_into_error(); + } + + #[test] + fn test_eq() { + let outcome1 = WorkerOutcome::Success { + value: 42, + task_id: 1, + }; + let outcome2 = WorkerOutcome::Success { + value: 42, + task_id: 1, + }; + assert_eq!(outcome1, outcome2); + + let outcome3 = WorkerOutcome::Success { + value: 42, + task_id: 2, + }; + assert_ne!(outcome1, outcome3); + + let outcome4 = WorkerOutcome::Failure { + input: None, + error: (), + task_id: 1, + }; + assert_ne!(outcome1, outcome4); + } +} + +#[cfg(all(test, feature = "retry"))] +mod retry_tests { + use super::Outcome; + use crate::bee::TaskMeta; + use crate::bee::stock::EchoWorker; + + type Worker = EchoWorker; + type WorkerOutcome = Outcome; + + #[test] + fn test_try_into_input() { + let outcome = WorkerOutcome::MaxRetriesAttempted { + input: 42, + error: (), + task_id: 1, + }; + assert_eq!(outcome.try_into_input(), Some(42)); + } + + #[test] + fn test_try_into_error() { + let outcome = WorkerOutcome::MaxRetriesAttempted { + input: 42, + error: (), + task_id: 1, + }; + assert_eq!(outcome.try_into_error(), Some(())); + } + + #[test] + fn test_from_fatal() { + let input = 1; + let task_id = 1; + let error = (); + let outcome = WorkerOutcome::from_fatal(input, TaskMeta::new(task_id), error); + let expected_outcome = WorkerOutcome::Failure { + input: Some(input), + task_id, + error, + }; + assert_eq!(outcome, expected_outcome); + } +} diff --git a/src/hive/outcome/iter.rs b/src/hive/outcome/iter.rs index 54f9405..abad145 100644 --- a/src/hive/outcome/iter.rs +++ b/src/hive/outcome/iter.rs @@ -37,7 +37,7 @@ impl UnorderedOutcomeIterator { { let task_ids: BTreeSet<_> = task_ids.into_iter().collect(); Self { - inner: Box::new(inner.into_iter().take(task_ids.len())), + inner: Box::new(inner.into_iter()), task_ids, } } @@ -47,19 +47,27 @@ impl Iterator for UnorderedOutcomeIterator { type Item = Outcome; fn next(&mut self) -> Option { + if self.task_ids.is_empty() { + return None; + } loop { match self.inner.next() { Some(outcome) if self.task_ids.remove(outcome.task_id()) => break Some(outcome), - Some(_) => continue, // drop unrequested outcomes - None if !self.task_ids.is_empty() => { + None => { // convert extra task_ids to Missing outcomes break Some(Outcome::Missing { task_id: self.task_ids.pop_first().unwrap(), }); } - None => break None, + _ => continue, // drop unrequested outcomes } } + .inspect(|outcome| { + // if the originating task submitted subtasks, add their IDs to the queue + if let Some(subtask_ids) = outcome.subtask_ids() { + self.task_ids.extend(subtask_ids); + } + }) } } @@ -84,7 +92,7 @@ impl OrderedOutcomeIterator { { let task_ids: VecDeque = task_ids.into_iter().collect(); Self { - inner: Box::new(inner.into_iter().take(task_ids.len())), + inner: Box::new(inner.into_iter()), buf: HashMap::with_capacity(task_ids.len()), task_ids, } @@ -122,10 +130,19 @@ impl Iterator for OrderedOutcomeIterator { //if !self.buf.is_empty() { .. } break None; } + .inspect(|outcome| { + // if the originating task submitted subtasks, add their IDs to the queue + if let Some(subtask_ids) = outcome.subtask_ids() { + self.task_ids.extend(subtask_ids); + } + }) } } /// Extension trait for iterators over `Outcome`s. +/// +/// Note that, if your worker submits additional tasks to the `Hive`, their `Outcome`s will be +/// included in the iterator. pub trait OutcomeIteratorExt: IntoIterator> + Sized { /// Consumes this iterator and returns an unordered iterator over the `Outcome`s with the /// specified `task_ids`. @@ -233,6 +250,7 @@ pub trait OutcomeIteratorExt: IntoIterator> + Sized impl>> OutcomeIteratorExt for T {} #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::{OrderedOutcomeIterator, UnorderedOutcomeIterator}; use crate::bee::stock::EchoWorker; diff --git a/src/hive/outcome/mod.rs b/src/hive/outcome/mod.rs index bd16c81..83f8907 100644 --- a/src/hive/outcome/mod.rs +++ b/src/hive/outcome/mod.rs @@ -1,12 +1,109 @@ mod batch; +mod r#impl; mod iter; -#[allow(clippy::module_inception)] -mod outcome; +mod queue; mod store; -pub use batch::OutcomeBatch; -pub use iter::OutcomeIteratorExt; -pub use outcome::Outcome; -pub use store::OutcomeStore; +pub use self::batch::OutcomeBatch; +pub use self::iter::OutcomeIteratorExt; +pub use self::queue::OutcomeQueue; +pub use self::store::OutcomeStore; -pub(super) use store::sealed::{DerefOutcomes, OwnedOutcomes}; +pub(super) use self::store::{DerefOutcomes, OwnedOutcomes}; + +use crate::bee::{TaskId, Worker}; +use crate::panic::Panic; +use derive_more::Debug; + +/// The possible outcomes of a task execution. +/// +/// Each outcome includes the task ID of the task that produced it. Tasks that submitted +/// subtasks (via [`crate::bee::Context::submit`]) produce `Outcome` variants that have +/// `subtask_ids`. +/// +/// Note that `Outcome`s can only be compared or ordered with other `Outcome`s produced by the same +/// `Hive`, because comparison/ordering is completely based on the task ID. +#[derive(Debug)] +pub enum Outcome { + /// The task was executed successfully. + Success { + #[debug(skip)] + value: W::Output, + task_id: TaskId, + }, + /// The task was executed successfully, and it also submitted one or more subtask_ids to the + /// `Hive`. + SuccessWithSubtasks { + #[debug(skip)] + value: W::Output, + task_id: TaskId, + subtask_ids: Vec, + }, + /// The task failed with an error that was not retryable. The input value that caused the + /// failure is provided if possible. + Failure { + #[debug(skip)] + input: Option, + error: W::Error, + task_id: TaskId, + }, + /// The task failed with an error that was not retryable, but it submitted one or more subtask_ids + /// before failing. The input value that caused the failure is provided if possible. + FailureWithSubtasks { + #[debug(skip)] + input: Option, + error: W::Error, + task_id: TaskId, + subtask_ids: Vec, + }, + /// The task was not executed before the Hive was dropped, or processing of the task was + /// interrupted (e.g., by `suspend`ing the `Hive`). + Unprocessed { + #[debug(skip)] + input: W::Input, + task_id: TaskId, + }, + /// The task was not executed before the Hive was dropped, or processing of the task was + /// interrupted (e.g., by `suspend`ing the `Hive`), but it first submitted one or more subtask_ids. + UnprocessedWithSubtasks { + #[debug(skip)] + input: W::Input, + task_id: TaskId, + subtask_ids: Vec, + }, + /// The task with the given task_id was not found in the `Hive` or iterator from which it was + /// being requested. + Missing { task_id: TaskId }, + /// The task panicked. The input value that caused the panic is provided if possible. + Panic { + #[debug(skip)] + input: Option, + payload: Panic, + task_id: TaskId, + }, + /// The task panicked, but it submitted one or more subtask_ids before panicking. The input value + /// that caused the panic is provided if possible. + PanicWithSubtasks { + #[debug(skip)] + input: Option, + payload: Panic, + task_id: TaskId, + subtask_ids: Vec, + }, + /// The task's weight was larger than the configured limit for the `Hive`. + #[cfg(feature = "local-batch")] + WeightLimitExceeded { + #[debug(skip)] + input: W::Input, + weight: u32, + task_id: TaskId, + }, + /// The task failed after retrying the maximum number of times. + #[cfg(feature = "retry")] + MaxRetriesAttempted { + #[debug(skip)] + input: W::Input, + error: W::Error, + task_id: TaskId, + }, +} diff --git a/src/hive/outcome/outcome.rs b/src/hive/outcome/outcome.rs deleted file mode 100644 index be7c179..0000000 --- a/src/hive/outcome/outcome.rs +++ /dev/null @@ -1,341 +0,0 @@ -use crate::bee::{ApplyError, TaskId, Worker, WorkerResult}; -use crate::panic::Panic; -use std::cmp::Ordering; -use std::fmt::Debug; - -/// The possible outcomes of a task execution. -/// -/// Each outcome includes the task ID of the task that produced it. -/// -/// Note that `Outcome`s can only be compared or ordered with other `Outcome`s produced by the same -/// `Hive`, because comparison/ordering is completely based on the task ID. -#[derive(Debug)] -pub enum Outcome { - /// The task was executed successfully. - Success { value: W::Output, task_id: TaskId }, - /// The task failed with an error that was not retryable. The input value that caused the - /// failure is provided if possible. - Failure { - input: Option, - error: W::Error, - task_id: TaskId, - }, - /// The task was not executed before the Hive was dropped, or processing of the task was - /// interrupted (e.g., by `suspend`ing the `Hive`). - Unprocessed { input: W::Input, task_id: TaskId }, - /// The task with the given task_id was not found in the `Hive` or iterator from which it was - /// being requested. - Missing { task_id: TaskId }, - /// The task panicked. The input value that caused the panic is provided if possible. - Panic { - input: Option, - payload: Panic, - task_id: TaskId, - }, - /// The task failed after retrying the maximum number of times. - #[cfg(feature = "retry")] - MaxRetriesAttempted { - input: W::Input, - error: W::Error, - task_id: TaskId, - }, -} - -impl Outcome { - /// Converts a worker `result` into an `Outcome` with the given task_id. - pub(in crate::hive) fn from_worker_result(result: WorkerResult, task_id: TaskId) -> Self { - match result { - Ok(value) => Self::Success { task_id, value }, - Err(ApplyError::Retryable { input, error }) => { - #[cfg(feature = "retry")] - { - Self::MaxRetriesAttempted { - input, - error, - task_id, - } - } - #[cfg(not(feature = "retry"))] - { - Self::Failure { - input: Some(input), - error, - task_id, - } - } - } - Err(ApplyError::Fatal { input, error }) => Self::Failure { - input, - error, - task_id, - }, - Err(ApplyError::Cancelled { input }) => Self::Unprocessed { input, task_id }, - Err(ApplyError::Panic { input, payload }) => Self::Panic { - input, - payload, - task_id, - }, - } - } - - /// Returns `true` if this is a `Success` outcome. - pub fn is_success(&self) -> bool { - matches!(self, Self::Success { .. }) - } - - /// Returns `true` if this outcome represents an unprocessed task input. - pub fn is_unprocessed(&self) -> bool { - matches!(self, Self::Unprocessed { .. }) - } - - /// Returns `true` if this outcome represents a task processing failure. - pub fn is_failure(&self) -> bool { - match self { - Self::Failure { .. } | Self::Panic { .. } => true, - #[cfg(feature = "retry")] - Self::MaxRetriesAttempted { .. } => true, - _ => false, - } - } - - /// Returns the task_id of the task that produced this outcome. - pub fn task_id(&self) -> &TaskId { - match self { - Self::Success { task_id, .. } - | Self::Failure { task_id, .. } - | Self::Unprocessed { task_id, .. } - | Self::Missing { task_id } - | Self::Panic { task_id, .. } => task_id, - #[cfg(feature = "retry")] - Self::MaxRetriesAttempted { task_id, .. } => task_id, - } - } - - /// Consumes this `Outcome` and returns the value if it is a `Success`, otherwise panics. - pub fn unwrap(self) -> W::Output { - self.success().expect("not a Success outcome") - } - - /// Consumes this `Outcome` and returns the output value if it is a `Success`, otherwise `None`. - pub fn success(self) -> Option { - match self { - Self::Success { value, .. } => Some(value), - _ => None, - } - } - - /// Consumes this `Outcome` and returns the input value if available, otherwise `None`. - pub fn try_into_input(self) -> Option { - match self { - Self::Success { .. } => None, - Self::Failure { input, .. } => input, - Self::Unprocessed { input, .. } => Some(input), - Self::Missing { .. } => None, - Self::Panic { input, .. } => input, - #[cfg(feature = "retry")] - Self::MaxRetriesAttempted { input, .. } => Some(input), - } - } - - /// Consumes this `Outcome` and depending on the variant: - /// * Returns the wrapped error if this is a `Failure` or `MaxRetriesAttempted`, - /// * Resumes unwinding if this is a `Panic` outcome, - /// * Otherwise returns `None`. - pub fn try_into_error(self) -> Option { - match self { - Self::Success { .. } => None, - Self::Failure { error, .. } => Some(error), - Self::Unprocessed { .. } => None, - Self::Missing { .. } => None, - Self::Panic { payload, .. } => payload.resume(), - #[cfg(feature = "retry")] - Self::MaxRetriesAttempted { error, .. } => Some(error), - } - } -} - -impl PartialEq for Outcome { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Self::Success { task_id: a, .. }, Self::Success { task_id: b, .. }) => a == b, - (Self::Failure { task_id: a, .. }, Self::Failure { task_id: b, .. }) => a == b, - (Self::Unprocessed { task_id: a, .. }, Self::Unprocessed { task_id: b, .. }) => a == b, - (Self::Missing { task_id: a }, Self::Missing { task_id: b }) => a == b, - (Self::Panic { task_id: a, .. }, Self::Panic { task_id: b, .. }) => a == b, - #[cfg(feature = "retry")] - ( - Self::MaxRetriesAttempted { task_id: a, .. }, - Self::MaxRetriesAttempted { task_id: b, .. }, - ) => a == b, - _ => false, - } - } -} - -impl Eq for Outcome {} - -impl PartialOrd for Outcome { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Outcome { - fn cmp(&self, other: &Self) -> Ordering { - self.task_id().cmp(other.task_id()) - } -} - -#[cfg(test)] -mod tests { - use super::Outcome; - use crate::bee::stock::EchoWorker; - use crate::panic::Panic; - - type Worker = EchoWorker; - type WorkerOutcome = Outcome; - - #[test] - fn test_try_into_input() { - let outcome = WorkerOutcome::Success { - value: 42, - task_id: 1, - }; - assert_eq!(outcome.try_into_input(), None); - - let outcome = WorkerOutcome::Failure { - input: None, - error: (), - task_id: 2, - }; - assert_eq!(outcome.try_into_input(), None); - - let outcome = WorkerOutcome::Failure { - input: Some(42), - error: (), - task_id: 2, - }; - assert_eq!(outcome.try_into_input(), Some(42)); - - let outcome = WorkerOutcome::Unprocessed { - input: 42, - task_id: 3, - }; - assert_eq!(outcome.try_into_input(), Some(42)); - - let outcome = WorkerOutcome::Missing { task_id: 4 }; - assert_eq!(outcome.try_into_input(), None); - - let outcome = WorkerOutcome::Panic { - input: None, - payload: Panic::try_call(None, || panic!()).unwrap_err(), - task_id: 5, - }; - assert_eq!(outcome.try_into_input(), None); - - let outcome = WorkerOutcome::Panic { - input: Some(42), - payload: Panic::try_call(None, || panic!()).unwrap_err(), - task_id: 5, - }; - assert_eq!(outcome.try_into_input(), Some(42)); - } - - #[test] - fn test_try_into_error() { - let outcome = WorkerOutcome::Success { - value: 42, - task_id: 1, - }; - assert_eq!(outcome.try_into_error(), None); - - let outcome = WorkerOutcome::Failure { - input: None, - error: (), - task_id: 2, - }; - assert_eq!(outcome.try_into_error(), Some(())); - - let outcome = WorkerOutcome::Failure { - input: Some(42), - error: (), - task_id: 2, - }; - assert_eq!(outcome.try_into_error(), Some(())); - - let outcome = WorkerOutcome::Unprocessed { - input: 42, - task_id: 3, - }; - assert_eq!(outcome.try_into_error(), None); - - let outcome = WorkerOutcome::Missing { task_id: 4 }; - assert_eq!(outcome.try_into_error(), None); - } - - #[test] - #[should_panic] - fn test_try_into_error_panic() { - WorkerOutcome::Panic { - input: None, - payload: Panic::try_call(None, || panic!()).unwrap_err(), - task_id: 5, - } - .try_into_error(); - } - - #[test] - fn test_eq() { - let outcome1 = WorkerOutcome::Success { - value: 42, - task_id: 1, - }; - let outcome2 = WorkerOutcome::Success { - value: 42, - task_id: 1, - }; - assert_eq!(outcome1, outcome2); - - let outcome3 = WorkerOutcome::Success { - value: 42, - task_id: 2, - }; - assert_ne!(outcome1, outcome3); - - let outcome4 = WorkerOutcome::Failure { - input: None, - error: (), - task_id: 1, - }; - assert_ne!(outcome1, outcome4); - } -} - -#[cfg(all(test, feature = "retry"))] -mod retry_tests { - use super::Outcome; - use crate::bee::stock::EchoWorker; - - type Worker = EchoWorker; - type WorkerOutcome = Outcome; - - #[test] - fn test_try_into_input() { - let outcome = WorkerOutcome::MaxRetriesAttempted { - input: 42, - error: (), - task_id: 1, - }; - assert_eq!(outcome.try_into_input(), Some(42)); - } - - #[test] - fn test_try_into_error() { - let outcome = WorkerOutcome::MaxRetriesAttempted { - input: 42, - error: (), - task_id: 1, - }; - assert_eq!(outcome.try_into_error(), Some(())); - } -} diff --git a/src/hive/outcome/queue.rs b/src/hive/outcome/queue.rs new file mode 100644 index 0000000..9ed7b70 --- /dev/null +++ b/src/hive/outcome/queue.rs @@ -0,0 +1,118 @@ +use super::{DerefOutcomes, Outcome}; +use crate::bee::{TaskId, Worker}; +use crossbeam_queue::SegQueue; +use parking_lot::Mutex; +use std::collections::HashMap; +use std::ops::{Deref, DerefMut}; + +/// Data structure that supports queuing `Outcomes` from multiple threads (without locking) and +/// fetching from a single thread (which requires draining the queue into a map that is behind a +/// mutex). +/// +/// TODO: test vs using a +/// [`SkipMap`](https://docs.rs/crossbeam-skiplist/latest/crossbeam_skiplist/struct.SkipMap.html) or +/// [`DashMap`](https://docs.rs/dashmap/latest/dashmap/struct.DashMap.html) +pub struct OutcomeQueue { + queue: SegQueue>, + outcomes: Mutex>>, +} + +impl OutcomeQueue { + /// Adds an `outcome` to the queue. + pub fn push(&self, outcome: Outcome) { + self.queue.push(outcome); + } + + /// Flushes the queue into the map of outcomes and returns a mutable reference to the map. + pub fn get_mut(&self) -> impl DerefMut>> { + let mut outcomes = self.outcomes.lock(); + drain_into(&self.queue, &mut outcomes); + outcomes + } + + /// Consumes this `OutcomeQueue`, drains the queue, and returns the outcomes as a map. + pub fn into_inner(self) -> HashMap> { + let mut outcomes = self.outcomes.into_inner(); + drain_into(&self.queue, &mut outcomes); + outcomes + } +} + +#[inline] +fn drain_into(queue: &SegQueue>, outcomes: &mut HashMap>) { + while let Some(outcome) = queue.pop() { + outcomes.insert(*outcome.task_id(), outcome); + } +} + +impl Default for OutcomeQueue { + fn default() -> Self { + Self { + queue: Default::default(), + outcomes: Default::default(), + } + } +} + +impl DerefOutcomes for OutcomeQueue { + fn outcomes_deref(&self) -> impl Deref>> { + self.get_mut() + } + + fn outcomes_deref_mut(&mut self) -> impl DerefMut>> { + self.get_mut() + } +} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use super::*; + use crate::bee::stock::EchoWorker; + use crate::hive::OutcomeStore; + + #[test] + fn test_works() { + let queue = OutcomeQueue::>::default(); + queue.push(Outcome::Success { + value: 42, + task_id: 1, + }); + queue.push(Outcome::Unprocessed { + input: 43, + task_id: 2, + }); + queue.push(Outcome::Failure { + input: Some(44), + error: (), + task_id: 3, + }); + assert_eq!(queue.count(), (1, 1, 1)); + queue.push(Outcome::Missing { task_id: 4 }); + let outcomes = queue.into_inner(); + assert_eq!(outcomes.len(), 4); + assert_eq!( + outcomes[&1], + Outcome::Success { + value: 42, + task_id: 1 + } + ); + assert_eq!( + outcomes[&2], + Outcome::Unprocessed { + input: 43, + task_id: 2 + } + ); + assert_eq!( + outcomes[&3], + Outcome::Failure { + input: Some(44), + error: (), + task_id: 3 + } + ); + assert_eq!(outcomes[&4], Outcome::Missing { task_id: 4 }) + } +} diff --git a/src/hive/outcome/store.rs b/src/hive/outcome/store.rs index 0a22b7c..45b5733 100644 --- a/src/hive/outcome/store.rs +++ b/src/hive/outcome/store.rs @@ -1,30 +1,27 @@ -use super::{DerefOutcomes, Outcome}; +use super::Outcome; use crate::bee::{TaskId, Worker}; +use std::{ + collections::HashMap, + ops::{Deref, DerefMut}, +}; + +/// Trait implemented by structs that provide temporary access (both read-only and mutable) to a +/// reference to a map of outcomes. +pub trait DerefOutcomes { + /// Returns a read-only reference to a map of task task_id to `Outcome`. + fn outcomes_deref(&self) -> impl Deref>>; + + /// Returns a mutable reference to a map of task task_id to `Outcome`. + fn outcomes_deref_mut(&mut self) -> impl DerefMut>>; +} -/// Traits with methods that should only be accessed internally by public traits. -pub mod sealed { - use crate::bee::{TaskId, Worker}; - use crate::hive::Outcome; - use std::{ - collections::HashMap, - ops::{Deref, DerefMut}, - }; - - pub trait DerefOutcomes { - /// Returns a read-only reference to a map of task task_id to `Outcome`. - fn outcomes_deref(&self) -> impl Deref>>; - - /// Returns a mutable reference to a map of task task_id to `Outcome`. - fn outcomes_deref_mut(&mut self) -> impl DerefMut>>; - } - - pub trait OwnedOutcomes: Sized { - /// Returns an owned map of task task_id to `Outcome`. - fn outcomes(self) -> HashMap>; +/// Trait implemented by structs that provide (thread-unsafe) access to an owned outcome map. +pub trait OwnedOutcomes: Sized { + /// Returns an owned map of task task_id to `Outcome`. + fn outcomes(self) -> HashMap>; - /// Returns a read-only reference to a map of task task_id to `Outcome`. - fn outcomes_ref(&self) -> &HashMap>; - } + /// Returns a read-only reference to a map of task task_id to `Outcome`. + fn outcomes_ref(&self) -> &HashMap>; } /// Trait implemented by structs that store `Outcome`s (`Hive`, `Husk`, and `OutcomeBatch`). @@ -32,7 +29,7 @@ pub mod sealed { /// The first group of methods provided by this trait only require dereferencing the underlying map, /// while the second group of methods require the ability to borrow or take ownership of the /// underlying map (and thus, are not in scope for `Hive`). -pub trait OutcomeStore: sealed::DerefOutcomes { +pub trait OutcomeStore: DerefOutcomes { fn len(&self) -> usize { self.outcomes_deref().len() } @@ -59,7 +56,9 @@ pub trait OutcomeStore: sealed::DerefOutcomes { fn assert_empty(&self, allow_successes: bool) { let (unprocessed, successes, failures) = self.count(); if !allow_successes && successes > 0 { - panic!("{unprocessed} unprocessed inputs, {successes} successes, and {failures} failed tasks found"); + panic!( + "{unprocessed} unprocessed inputs, {successes} successes, and {failures} failed tasks found" + ); } else if unprocessed > 0 || failures > 0 { panic!("{unprocessed} unprocessed inputs and {failures} failed tasks found"); } @@ -217,7 +216,7 @@ pub trait OutcomeStore: sealed::DerefOutcomes { /// Returns the stored `Outcome` associated with the given task_id, if any. fn get(&self, task_id: TaskId) -> Option<&Outcome> where - Self: sealed::OwnedOutcomes, + Self: OwnedOutcomes, { self.outcomes_ref().get(&task_id) } @@ -225,7 +224,7 @@ pub trait OutcomeStore: sealed::DerefOutcomes { /// Consumes this store and returns an iterator over the outcomes in task_id order. fn into_iter(self) -> impl Iterator> where - Self: sealed::OwnedOutcomes, + Self: OwnedOutcomes, { self.outcomes().into_values() } @@ -233,7 +232,7 @@ pub trait OutcomeStore: sealed::DerefOutcomes { /// Returns the successes as a `Vec` if there are no errors, otherwise panics. fn unwrap(self) -> Vec where - Self: sealed::OwnedOutcomes, + Self: OwnedOutcomes, { assert!( !(self.has_failures() || self.has_unprocessed()), @@ -252,7 +251,7 @@ pub trait OutcomeStore: sealed::DerefOutcomes { /// they cause this method to panic. fn ok_or_unwrap_errors(self, drop_unprocessed: bool) -> Result, Vec> where - Self: sealed::OwnedOutcomes, + Self: OwnedOutcomes, { assert!( drop_unprocessed || !self.has_unprocessed(), @@ -274,7 +273,7 @@ pub trait OutcomeStore: sealed::DerefOutcomes { /// inputs are returned in task_id order, otherwise they are unordered. fn into_unprocessed(self, ordered: bool) -> Vec where - Self: sealed::OwnedOutcomes, + Self: OwnedOutcomes, { let values = self .outcomes() @@ -300,7 +299,7 @@ pub trait OutcomeStore: sealed::DerefOutcomes { /// that were queued but not yet processed when the `Hive` was dropped. fn iter_unprocessed(&self) -> impl Iterator where - Self: sealed::OwnedOutcomes, + Self: OwnedOutcomes, { self.outcomes_ref() .values() @@ -314,7 +313,7 @@ pub trait OutcomeStore: sealed::DerefOutcomes { /// that were successfully processed but not sent to any output channel. fn iter_successes(&self) -> impl Iterator where - Self: sealed::OwnedOutcomes, + Self: OwnedOutcomes, { self.outcomes_ref() .values() @@ -328,7 +327,7 @@ pub trait OutcomeStore: sealed::DerefOutcomes { /// that were successfully processed but not sent to any output channel. fn iter_failures(&self) -> impl Iterator> where - Self: sealed::OwnedOutcomes, + Self: OwnedOutcomes, { self.outcomes_ref() .values() @@ -340,6 +339,7 @@ pub trait OutcomeStore: sealed::DerefOutcomes { impl> OutcomeStore for D {} #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::OutcomeStore; use crate::bee::{Context, Worker, WorkerResult}; @@ -354,7 +354,7 @@ mod tests { type Output = u8; type Error = (); - fn apply(&mut self, i: Self::Input, _: &Context) -> WorkerResult { + fn apply(&mut self, i: Self::Input, _: &Context) -> WorkerResult { Ok(i) } } @@ -529,8 +529,8 @@ mod tests { #[cfg(all(test, feature = "retry"))] mod retry_tests { - use super::tests::TestWorker; use super::OutcomeStore; + use super::tests::TestWorker; use crate::hive::{Outcome, OutcomeBatch}; use crate::panic::Panic; diff --git a/src/hive/scoped/hive.rs b/src/hive/scoped/hive.rs deleted file mode 100644 index 58232aa..0000000 --- a/src/hive/scoped/hive.rs +++ /dev/null @@ -1,152 +0,0 @@ -use crate::{ApplyError, Panic}; -use parking_lot::Mutex; -use std::{ - fmt::Debug, - sync::{mpsc, Arc}, - thread, -}; - -pub type WorkerError = ApplyError<::Input, ::Error>; -pub type WorkerResult = Result<::Output, WorkerError>; - -pub trait Worker: Debug + Sized { - type Input: Send; - type Output: Send; - type Error: Send + Debug; - - fn apply(&mut self, _: Self::Input, _: &Context) -> WorkerResult; -} - -pub trait Queen: Send + Sync { - type Kind: Worker; - - fn create(&mut self) -> Self::Kind; -} - -pub struct Hive> { - queen: Mutex, - num_threads: usize, -} - -#[derive(thiserror::Error, Debug)] -pub enum HiveError { - #[error("Task failed")] - Failed(W::Error), - #[error("Task retried the maximum number of times")] - MaxRetriesAttempted(W::Error), - #[error("Task input was not processed")] - Unprocessed(W::Input), - #[error("Task panicked")] - Panic(Panic), -} - -pub type HiveResult = Result>; -pub type TaskResult = HiveResult<::Output, W>; - -#[derive(Debug, PartialEq, Eq)] -pub enum Outcome { - /// The task was executed successfully. - Success { value: W::Output, task_id: TaskId }, - /// The task failed with an error that was not retryable. - Failure { error: W::Error, task_id: TaskId }, - /// The task failed after retrying the maximum number of times. - MaxRetriesAttempted { error: W::Error, task_id: TaskId }, - /// The task was not executed before the Hive was closed. - Unprocessed { value: W::Input, task_id: TaskId }, - /// The task panicked. - Panic { - payload: Panic, - task_id: TaskId, - }, -} - -impl Outcome { - /// Returns the ID of the task that produced this outcome. - pub fn task_id(&self) -> TaskId { - match self { - Outcome::Success { task_id, .. } - | Outcome::Failure { task_id, .. } - | Outcome::MaxRetriesAttempted { task_id, .. } - | Outcome::Unprocessed { task_id, .. } - | Outcome::Panic { task_id, .. } => *task_id, - } - } - - /// Creates a new `Outcome` from a `Panic`. - pub fn from_panic(payload: Panic, task_id: TaskId) -> Outcome { - Outcome::Panic { payload, task_id } - } - - pub(crate) fn from_panic_result( - result: Result, Panic>, - task_id: TaskId, - ) -> Outcome { - match result { - Ok(result) => Outcome::from_worker_result(result, task_id), - Err(panic) => Outcome::from_panic(panic, task_id), - } - } - - pub(crate) fn from_worker_result(result: WorkerResult, task_id: TaskId) -> Outcome { - match result { - Ok(value) => Self::Success { task_id, value }, - Err(ApplyError::Cancelled { input } | ApplyError::Retryable { input, .. }) => { - Self::Unprocessed { - value: input, - task_id, - } - } - Err(ApplyError::Fatal(error)) => Self::Failure { error, task_id }, - } - } -} - -/// Context for a task. -#[derive(Debug, Default)] -pub struct Context { - task_id: TaskId, - attempt: u32, -} - -impl Context { - fn new(task_id: TaskId) -> Self { - Self { - task_id, - attempt: 0, - } - } -} - -impl> Hive { - pub fn map(&self, inputs: impl IntoIterator) { - //-> impl Iterator> { - let (task_tx, task_rx) = mpsc::channel(); - let task_rx = Arc::new(Mutex::new(task_rx)); - let (outcome_tx, outcome_rx) = crate::outcome_channel(); - thread::scope(|scope| { - let join_handles = (0..self.num_threads) - .map(|task_id| { - let task_rx = task_rx.clone(); - let outcome_tx = outcome_tx.clone(); - scope.spawn(move || loop { - let mut worker = self.queen.lock().create(); - if let Ok(input) = task_rx.lock().recv() { - let ctx = Context::new(task_id); - let result: Result, Panic> = - Panic::try_call(None, || worker.apply(input, &ctx)); - let outcome: Outcome = Outcome::from_panic_result(result, task_id); - outcome_tx.send(outcome); - } else { - break; - } - }) - }) - .collect::>(); - }); - // let num_tasks = inputs - // .into_iter() - // .map(|input| self.apply_send(job, tx.clone())) - // .count(); - // rx.into_iter().take(num_tasks).map(Outcome::into_result) - } -} diff --git a/src/hive/scoped/mod.rs b/src/hive/scoped/mod.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/hive/sentinel.rs b/src/hive/sentinel.rs new file mode 100644 index 0000000..ce09878 --- /dev/null +++ b/src/hive/sentinel.rs @@ -0,0 +1,71 @@ +use super::{Shared, TaskQueues}; +use crate::bee::{Queen, Worker}; +use std::io::Error as SpawnError; +use std::sync::Arc; +use std::thread::{self, JoinHandle}; + +/// Sentinel for a worker thread. Until the sentinel is cancelled, it will respawn the worker +/// thread if it panics. +pub struct Sentinel +where + W: Worker, + Q: Queen, + T: TaskQueues, + F: Fn(usize, &Arc>) -> Result, SpawnError> + 'static, +{ + /// The index of the worker thread + thread_index: usize, + /// The shared data to pass to the new worker thread when respawning + shared: Arc>, + /// Whether sentinel is active + active: bool, + /// The function that will be called to respawn the worker thread + respawn_fn: F, +} + +impl Sentinel +where + W: Worker, + Q: Queen, + T: TaskQueues, + F: Fn(usize, &Arc>) -> Result, SpawnError> + 'static, +{ + pub fn new(thread_index: usize, shared: Arc>, respawn_fn: F) -> Self { + Self { + thread_index, + shared, + active: true, + respawn_fn, + } + } + + /// Cancel and destroy this sentinel. + pub fn cancel(mut self) { + self.active = false; + } +} + +impl Drop for Sentinel +where + W: Worker, + Q: Queen, + T: TaskQueues, + F: Fn(usize, &Arc>) -> Result, SpawnError> + 'static, +{ + fn drop(&mut self) { + if self.active { + // if the sentinel is active, that means the thread panicked during task execution, so + // we have to finish the task here before respawning + self.shared.finish_task(thread::panicking()); + // only respawn if the sentinel is active and the hive has not been poisoned + if !self.shared.is_poisoned() { + // can't do anything with the previous JoinHandle + let _ = self + .shared + .respawn_thread(self.thread_index, |thread_index| { + (self.respawn_fn)(thread_index, &self.shared) + }); + } + } + } +} diff --git a/src/hive/shared.rs b/src/hive/shared.rs deleted file mode 100644 index 0eb7fff..0000000 --- a/src/hive/shared.rs +++ /dev/null @@ -1,816 +0,0 @@ -use super::counter::CounterError; -use super::{Config, Outcome, OutcomeSender, Shared, SpawnError, Task, TaskReceiver}; -use crate::atomic::{Atomic, AtomicInt, AtomicUsize}; -use crate::bee::{Context, Queen, TaskId, Worker}; -use crate::channel::SenderExt; -use parking_lot::Mutex; -use std::collections::HashMap; -use std::ops::DerefMut; -use std::sync::mpsc::RecvTimeoutError; -use std::thread::{Builder, JoinHandle}; -use std::time::Duration; -use std::{fmt, iter, mem}; - -impl> Shared { - /// Creates a new `Shared` instance with the given configuration, queen, and task receiver, - /// and all other fields set to their default values. - pub fn new(config: Config, queen: Q, task_rx: TaskReceiver) -> Self { - Shared { - config, - queen: Mutex::new(queen), - task_rx: Mutex::new(task_rx), - spawn_results: Default::default(), - num_tasks: Default::default(), - next_task_id: Default::default(), - num_panics: Default::default(), - num_referrers: AtomicUsize::new(1), - poisoned: Default::default(), - suspended: Default::default(), - resume_gate: Default::default(), - join_gate: Default::default(), - outcomes: Default::default(), - #[cfg(feature = "batching")] - local_queues: Default::default(), - #[cfg(feature = "retry")] - retry_queues: Default::default(), - } - } - - /// Returns a `Builder` for creating a new thread in the `Hive`. - pub fn thread_builder(&self) -> Builder { - let mut builder = Builder::new(); - if let Some(ref name) = self.config.thread_name.get() { - builder = builder.name(name.clone()); - } - if let Some(ref stack_size) = self.config.thread_stack_size.get() { - builder = builder.stack_size(stack_size.to_owned()); - } - builder - } - - /// Spawns the initial set of `self.config.num_threads` worker threads using the provided - /// spawning function. Returns the number of worker threads that were successfully started. - pub fn init_threads(&self, f: F) -> usize - where - F: Fn(usize) -> Result, SpawnError>, - { - let num_threads = self.config.num_threads.get_or_default(); - if num_threads == 0 { - return 0; - } - let mut spawn_results = self.spawn_results.lock(); - self.spawn_threads(0, num_threads, f, &mut spawn_results) - } - - /// Increases the maximum number of threads allowed in the `Hive` by `num_threads`, and - /// attempts to spawn threads with indices in `range = cur_index..cur_index + num_threads` - /// using the provided spawning function. The results are stored in `self.spawn_results[range]`. - /// Returns the number of new worker threads that were successfully started. - pub fn grow_threads(&self, num_threads: usize, f: F) -> usize - where - F: Fn(usize) -> Result, SpawnError>, - { - let mut spawn_results = self.spawn_results.lock(); - let start_index = self.config.num_threads.add(num_threads).unwrap(); - self.spawn_threads(start_index, num_threads, f, &mut spawn_results) - } - - fn spawn_threads( - &self, - start_index: usize, - num_threads: usize, - f: F, - spawn_results: &mut Vec, SpawnError>>, - ) -> usize - where - F: Fn(usize) -> Result, SpawnError>, - { - assert_eq!(spawn_results.len(), start_index); - let end_index = start_index + num_threads; - // if worker threads need a local queue, initialize them before spawning - #[cfg(feature = "batching")] - self.init_local_queues(start_index, end_index); - #[cfg(feature = "retry")] - self.init_retry_queues(start_index, end_index); - // spawn the worker threads and return the results - let results: Vec<_> = (start_index..end_index).map(f).collect(); - spawn_results.reserve(num_threads); - results - .into_iter() - .map(|result| { - let started = result.is_ok(); - spawn_results.push(result); - started - }) - .filter(|started| *started) - .count() - } - - /// Attempts to spawn a thread to replace the one at the specified `index` using the provided - /// spawning function. The result is stored in `self.spawn_results[index]`. Returns the - /// spawn result for the previous thread at the same index. - pub fn respawn_thread(&self, index: usize, f: F) -> Result, SpawnError> - where - F: FnOnce(usize) -> Result, SpawnError>, - { - let result = f(index); - let mut spawn_results = self.spawn_results.lock(); - assert!(spawn_results.len() > index); - // Note: we do *not* want to wait on the `JoinHandle` for the previous thread as it may - // still be processing a task - std::mem::replace(&mut spawn_results[index], result) - } - - /// Attempts to respawn any threads that are currently dead using the provided spawning - /// function. Returns the number of threads that were successfully respawned. - //#[cfg_attr(coverage(off)] // no idea how to test this - pub fn respawn_dead_threads(&self, f: F) -> usize - where - F: Fn(usize) -> Result, SpawnError>, - { - self.spawn_results - .lock() - .iter_mut() - .enumerate() - .filter(|(_, result)| result.is_err()) - .map(|(i, result)| { - let new_result = f(i); - let started = new_result.is_ok(); - *result = new_result; - started - }) - .filter(|started| *started) - .count() - } - - /// Returns a new `Worker` from the queen, or an error if a `Worker` could not be created. - pub fn create_worker(&self) -> Q::Kind { - self.queen.lock().create() - } - - /// Increments the number of queued tasks. Returns a new `Task` with the provided input and - /// `outcome_tx` and the next ID. - pub fn prepare_task(&self, input: W::Input, outcome_tx: Option>) -> Task { - self.num_tasks - .increment_left(1) - .expect("overflowed queued task counter"); - let task_id = self.next_task_id.add(1); - let ctx = Context::new(task_id, self.suspended.clone()); - Task::new(input, ctx, outcome_tx) - } - - /// Increments the number of queued tasks by the number of provided inputs. Returns an iterator - /// over `Task`s created from the provided inputs, `outcome_tx`s, and sequential task_ids. - pub fn prepare_batch<'a, T: Iterator + 'a>( - &'a self, - min_size: usize, - inputs: T, - outcome_tx: Option>, - ) -> impl Iterator> + 'a { - self.num_tasks - .increment_left(min_size as u64) - .expect("overflowed queued task counter"); - let task_id_start = self.next_task_id.add(min_size); - let task_id_end = task_id_start + min_size; - inputs - .map(Some) - .chain(iter::repeat_with(|| None)) - .zip( - (task_id_start..task_id_end) - .map(Some) - .chain(iter::repeat_with(|| None)), - ) - .map_while(move |pair| match pair { - (Some(input), Some(task_id)) => Some(Task { - input, - ctx: Context::new(task_id, self.suspended.clone()), - //attempt: 0, - outcome_tx: outcome_tx.clone(), - }), - (Some(input), None) => Some(self.prepare_task(input, outcome_tx.clone())), - (None, Some(_)) => panic!("batch contained fewer than {min_size} items"), - (None, None) => None, - }) - } - - /// Sends an outcome to `outcome_tx`, or stores it in the `Hive` shared data if there is no - /// sender, or if the send fails. - pub fn send_or_store_outcome(&self, outcome: Outcome, outcome_tx: Option>) { - if let Some(outcome) = if let Some(tx) = outcome_tx { - tx.try_send_msg(outcome) - } else { - Some(outcome) - } { - self.add_outcome(outcome) - } - } - - pub fn abandon_task(&self, task: Task) { - let (outcome, outcome_tx) = task.into_unprocessed(); - self.send_or_store_outcome(outcome, outcome_tx); - // decrement the queued counter since it was incremented but the task was never queued - let _ = self.num_tasks.decrement_left(1); - self.no_work_notify_all(); - } - - /// Converts each `Task` in the iterator into `Outcome::Unprocessed` and attempts to send it - /// to its `OutcomeSender` if there is one, or stores it if there is no sender or the send - /// fails. Returns a vector of task_ids of the tasks. - pub fn abandon_batch(&self, tasks: I) -> Vec - where - I: Iterator>, - { - // don't unlock outcomes unless we have to - let mut outcomes = Option::None; - let task_ids: Vec<_> = tasks - .map(|task| { - let task_id = task.id(); - let (outcome, outcome_tx) = task.into_unprocessed(); - if let Some(outcome) = if let Some(tx) = outcome_tx { - tx.try_send_msg(outcome) - } else { - Some(outcome) - } { - outcomes - .get_or_insert_with(|| self.outcomes.lock()) - .insert(task_id, outcome); - } - task_id - }) - .collect(); - // decrement the queued counter since it was incremented but the tasks were never queued - let _ = self.num_tasks.decrement_left(task_ids.len() as u64); - self.no_work_notify_all(); - task_ids - } - - /// Called by a worker thread after completing a task. Notifies any thread that has `join`ed - /// the `Hive` if there is no more work to be done. - #[inline] - pub fn finish_task(&self, panicking: bool) { - self.finish_tasks(1, panicking); - } - - pub fn finish_tasks(&self, n: u64, panicking: bool) { - self.num_tasks - .decrement_right(n) - .expect("active task counter was smaller than expected"); - if panicking { - self.num_panics.add(1); - } - self.no_work_notify_all(); - } - - /// Returns a tuple with the number of (queued, active) tasks. - #[inline] - pub fn num_tasks(&self) -> (u64, u64) { - self.num_tasks.get() - } - - /// Returns `true` if the hive has not been poisoned and there are either active tasks or there - /// are queued tasks and the cancelled flag hasn't been set. - #[inline] - pub fn has_work(&self) -> bool { - !self.is_poisoned() && { - let (queued, active) = self.num_tasks(); - active > 0 || (!self.is_suspended() && queued > 0) - } - } - - /// Blocks the current thread until all active tasks have been processed. Also waits until all - /// queued tasks have been processed unless the suspended flag has been set. - pub fn wait_on_done(&self) { - self.join_gate.wait_while(|| self.has_work()); - } - - /// Notify all observers joining this hive when all tasks have been completed. - pub fn no_work_notify_all(&self) { - if !self.has_work() { - self.join_gate.notify_all(); - } - } - - /// Returns the number of `Hive`s holding a reference to this shared data. - pub fn num_referrers(&self) -> usize { - self.num_referrers.get() - } - - /// Increments the number of referrers and returns the previous value. - pub fn referrer_is_cloning(&self) -> usize { - self.num_referrers.add(1) - } - - /// Decrements the number of referrers and returns the previous value. - pub fn referrer_is_dropping(&self) -> usize { - self.num_referrers.sub(1) - } - - /// Sets the `poisoned` flag to `true`. Converts all queued tasks to `Outcome::Unprocessed` - /// and stores them in `outcomes`. Also automatically resumes the hive if it is suspendend, - /// which enables blocked worker threads to terminate. - pub fn poison(&self) { - self.poisoned.set(true); - self.drain_tasks_into_unprocessed(); - self.set_suspended(false); - } - - /// Returns `true` if the hive has been poisoned. A poisoned have may accept new tasks but will - /// never process them. Unprocessed tasks can be retrieved by calling `take_outcomes` or - /// `try_into_husk`. - #[inline] - pub fn is_poisoned(&self) -> bool { - self.poisoned.get() - } - - /// Sets the `suspended` flag. If `true`, worker threads may terminate early, and no new tasks - /// will be started until this flag is set to `false`. Returns `true` if the value was changed. - pub fn set_suspended(&self, suspended: bool) -> bool { - if self.suspended.set(suspended) == suspended { - false - } else { - if !suspended { - self.resume_gate.notify_all(); - } - true - } - } - - /// Returns `true` if the `suspended` flag has been set. - #[inline] - pub fn is_suspended(&self) -> bool { - self.suspended.get() - } - - /// Returns a mutable reference to the retained task outcomes. - pub fn outcomes(&self) -> impl DerefMut>> + '_ { - self.outcomes.lock() - } - - /// Adds a new outcome to the retained task outcomes. - pub fn add_outcome(&self, outcome: Outcome) { - let mut lock = self.outcomes.lock(); - lock.insert(*outcome.task_id(), outcome); - } - - /// Removes and returns all retained task outcomes. - pub fn take_outcomes(&self) -> HashMap> { - let mut lock = self.outcomes.lock(); - mem::take(&mut *lock) - } - - /// Removes and returns all retained `Unprocessed` outcomes. - pub fn take_unprocessed(&self) -> Vec> { - let mut outcomes = self.outcomes.lock(); - let unprocessed_task_ids: Vec<_> = outcomes - .keys() - .cloned() - .filter(|task_id| matches!(outcomes.get(task_id), Some(Outcome::Unprocessed { .. }))) - .collect(); - unprocessed_task_ids - .into_iter() - .map(|task_id| outcomes.remove(&task_id).unwrap()) - .collect() - } -} - -impl> fmt::Debug for Shared { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let (queued, active) = self.num_tasks(); - f.debug_struct("Shared") - .field("name", &self.config.thread_name) - .field("num_threads", &self.config.num_threads) - .field("num_tasks_queued", &queued) - .field("num_tasks_active", &active) - .finish() - } -} - -#[cfg(feature = "affinity")] -mod affinity { - use crate::bee::{Queen, Worker}; - use crate::hive::cores::{Core, Cores}; - use crate::hive::Shared; - - impl> Shared { - /// Adds cores to which worker threads may be pinned. - pub fn add_core_affinity(&self, new_cores: Cores) { - let _ = self.config.affinity.try_update_with(|mut affinity| { - let updated = affinity.union(&new_cores) > 0; - updated.then_some(affinity) - }); - } - - /// Returns the `Core` to which the specified worker thread may be pinned, if any. - pub fn get_core_affinity(&self, thread_index: usize) -> Option { - self.config - .affinity - .get() - .and_then(|cores| cores.get(thread_index)) - } - } -} - -#[inline] -fn task_recv_timeout(rx: &TaskReceiver) -> Option, NextTaskError>> { - // time to wait in between polling the retry queue and then the task receiver - const RECV_TIMEOUT: Duration = Duration::from_secs(1); - match rx.recv_timeout(RECV_TIMEOUT) { - Ok(task) => Some(Ok(task)), - Err(RecvTimeoutError::Disconnected) => Some(Err(NextTaskError::Disconnected)), - Err(RecvTimeoutError::Timeout) => None, - } -} - -#[cfg(not(feature = "batching"))] -mod no_batching { - use super::{NextTaskError, Shared, Task}; - use crate::bee::{Queen, Worker}; - - impl> Shared { - /// Tries to receive a task from the input channel. - /// - /// Returns an error if the channel has disconnected. Returns `None` if a task is not - /// received within the timeout period (currently hard-coded to 1 second). - #[inline] - pub(super) fn get_task(&self, _: usize) -> Option, NextTaskError>> { - super::task_recv_timeout(&self.task_rx.lock()) - } - } -} - -#[cfg(feature = "batching")] -mod batching { - use super::{NextTaskError, Shared, Task}; - use crate::bee::{Queen, Worker}; - use crossbeam_queue::ArrayQueue; - use std::collections::HashSet; - use std::time::Duration; - - impl> Shared { - pub(super) fn init_local_queues(&self, start_index: usize, end_index: usize) { - let mut local_queues = self.local_queues.write(); - assert_eq!(local_queues.len(), start_index); - // ArrayQueue cannot be zero-sized - let queue_size = self.batch_size().max(1); - (start_index..end_index).for_each(|_| local_queues.push(ArrayQueue::new(queue_size))) - } - - /// Returns the local queue batch size. - pub fn batch_size(&self) -> usize { - self.config.batch_size.get().unwrap_or_default() - } - - /// Changes the local queue batch size. This requires allocating a new queue for each - /// worker thread. - /// - /// Note: this method will block the current thread waiting for all local queues to become - /// writable; if `batch_size` is less than the current batch size, this method will also - /// block while any thread's queue length is > `batch_size` before moving the elements. - pub fn set_batch_size(&self, batch_size: usize) -> usize { - // update the batch size first so any new threads spawned won't need to have their - // queues resized - let prev_batch_size = self - .config - .batch_size - .try_set(batch_size) - .unwrap_or_default(); - if prev_batch_size == batch_size { - return prev_batch_size; - } - let num_threads = self.config.num_threads.get_or_default(); - if num_threads == 0 { - return prev_batch_size; - } - // keep track of which queues need to be resized - // TODO: this method could cause a hang if one of the worker threads is stuck - we - // might want to keep track of each queue's size and if we don't see it shrink within - // a certain amount of time, we give up on that thread and leave it with a wrong-sized - // queue (which should never cause a panic) - let mut to_resize: HashSet = (0..num_threads).collect(); - // iterate until we've resized them all - loop { - // scope the mutable access to local_queues - { - let mut local_queues = self.local_queues.write(); - to_resize.retain(|thread_index| { - let queue = if let Some(queue) = local_queues.get_mut(*thread_index) { - queue - } else { - return false; - }; - if queue.len() > batch_size { - return true; - } - let new_queue = ArrayQueue::new(batch_size); - while let Some(task) = queue.pop() { - if let Err(task) = new_queue.push(task) { - // for some reason we can't push the task to the new queue - // this should never happen, but just in case we turn it into - // an unprocessed outcome - self.abandon_task(task); - } - } - // this is safe because the worker threads can't get readable access to the - // queue while this thread holds the lock - let old_queue = std::mem::replace(queue, new_queue); - assert!(old_queue.is_empty()); - false - }); - } - if to_resize.is_empty() { - return prev_batch_size; - } else { - // short sleep to give worker threads the chance to pull from their queues - std::thread::sleep(Duration::from_millis(10)); - } - } - } - - /// Returns the next task from the local queue if there are any, otherwise attempts to - /// fetch at least 1 and up to `batch_size + 1` tasks from the input channel and puts all - /// but the first one into the local queue. - #[inline] - pub(super) fn get_task( - &self, - thread_index: usize, - ) -> Option, NextTaskError>> { - let local_queue = &self.local_queues.read()[thread_index]; - // pop from the local queue if it has any tasks - if !local_queue.is_empty() { - return Some(Ok(local_queue.pop().unwrap())); - } - // otherwise pull at least 1 and up to `batch_size + 1` tasks from the input channel - let task_rx = self.task_rx.lock(); - // wait for the next task from the receiver - let first = super::task_recv_timeout(&task_rx); - // if we fail after trying to get one, don't keep trying to fill the queue - if first.as_ref().map(|result| result.is_ok()).unwrap_or(false) { - let batch_size = self.batch_size(); - // batch size 0 means batching is disabled - if batch_size > 0 { - // otherwise try to take up to `batch_size` tasks from the input channel - // and add them to the local queue, but don't block if the input channel - // is empty - for result in task_rx - .try_iter() - .take(batch_size) - .map(|task| local_queue.push(task)) - { - if let Err(task) = result { - // for some reason we can't push the task to the local queue; - // this should never happen, but just in case we turn it into an - // unprocessed outcome and stop iterating - self.abandon_task(task); - break; - } - } - } - } - first - } - } -} - -/// Sends each `Task` to its associated outcome sender (if any) or stores it in `outcomes`. -/// TODO: if `outcomes` were `DerefMut` then the argument could either be a mutable referece or -/// a Lazy that aquires the lock on first access. Unfortunately, rust's Lazy does not support -/// mutable access, so we'd need something like OnceCell or OnceMutex. -fn send_or_store>>( - tasks: I, - outcomes: &mut HashMap>, -) { - tasks.for_each(|task| { - let (outcome, outcome_tx) = task.into_unprocessed(); - if let Some(outcome) = if let Some(tx) = outcome_tx { - tx.try_send_msg(outcome) - } else { - Some(outcome) - } { - outcomes.insert(*outcome.task_id(), outcome); - } - }); -} - -#[derive(thiserror::Error, Debug)] -pub enum NextTaskError { - #[error("Task receiver disconnected")] - Disconnected, - #[error("The hive has been poisoned")] - Poisoned, - #[error("Task counter has invalid state")] - InvalidCounter(CounterError), -} - -#[cfg(not(feature = "retry"))] -mod no_retry { - use super::{NextTaskError, Task}; - use crate::atomic::Atomic; - use crate::bee::{Queen, Worker}; - use crate::hive::{Husk, Shared}; - - impl> Shared { - /// Returns the next queued `Task`. The thread blocks until a new task becomes available, and - /// since this requires holding a lock on the task `Reciever`, this also blocks any other - /// threads that call this method. Returns `None` if the task `Sender` has hung up and there - /// are no tasks queued. Also returns `None` if the cancelled flag has been set. - pub fn next_task(&self, thread_index: usize) -> Result, NextTaskError> { - loop { - self.resume_gate.wait_while(|| self.is_suspended()); - - if self.is_poisoned() { - return Err(NextTaskError::Poisoned); - } - - if let Some(result) = self.get_task(thread_index) { - break result; - } - } - .and_then(|task| match self.num_tasks.transfer(1) { - Ok(_) => Ok(task), - Err(e) => { - // poison the hive so it can't be used anymore - self.poison(); - Err(NextTaskError::InvalidCounter(e)) - } - }) - } - - /// Drains all queued tasks, converts them into `Outcome::Unprocessed` outcomes, and tries - /// to send them or (if the task does not have a sender, or if the send fails) stores them - /// in the `outcomes` map. - pub fn drain_tasks_into_unprocessed(&self) { - let task_rx = self.task_rx.lock(); - let mut outcomes = self.outcomes.lock(); - super::send_or_store(task_rx.try_iter(), &mut outcomes); - } - - /// Consumes this `Shared` and returns a `Husk` containing the `Queen`, panic count, stored - /// outcomes, and all configuration information necessary to create a new `Hive`. Any queued - /// tasks are converted into `Outcome::Unprocessed` outcomes and either sent to the task's - /// sender or (if there is no sender, or the send fails) stored in the `outcomes` map. - pub fn try_into_husk(self) -> Husk { - let task_rx = self.task_rx.into_inner(); - let mut outcomes = self.outcomes.into_inner(); - super::send_or_store(task_rx.try_iter(), &mut outcomes); - Husk::new( - self.config.into_unsync(), - self.queen.into_inner(), - self.num_panics.into_inner(), - outcomes, - ) - } - } -} - -#[cfg(feature = "retry")] -mod retry { - use super::NextTaskError; - use crate::atomic::Atomic; - use crate::bee::{Context, Queen, Worker}; - use crate::hive::delay::DelayQueue; - use crate::hive::{Husk, OutcomeSender, Shared, Task}; - use std::time::{Duration, Instant}; - - impl> Shared { - /// Initializes the retry queues worker threads in the specified range. - pub(super) fn init_retry_queues(&self, start_index: usize, end_index: usize) { - let mut retry_queues = self.retry_queues.write(); - assert_eq!(retry_queues.len(), start_index); - (start_index..end_index).for_each(|_| retry_queues.push(DelayQueue::default())) - } - - /// Returns `true` if the hive is configured to retry tasks and the `attempt` field of the - /// given `ctx` is less than the maximum number of retries. - pub fn can_retry(&self, ctx: &Context) -> bool { - self.config - .max_retries - .get() - .map(|max_retries| ctx.attempt() < max_retries) - .unwrap_or(false) - } - - /// Adds a task to the retry queue with a delay based on `ctx.attempt()`. - pub fn queue_retry( - &self, - thread_index: usize, - input: W::Input, - ctx: Context, - outcome_tx: Option>, - ) -> Option { - // compute the delay - let delay = self - .config - .retry_factor - .get() - .map(|retry_factor| { - 2u64.checked_pow(ctx.attempt() - 1) - .and_then(|multiplier| { - retry_factor - .checked_mul(multiplier) - .or(Some(u64::MAX)) - .map(Duration::from_nanos) - }) - .unwrap() - }) - .unwrap_or_default(); - // try to queue the task - let task = Task::new(input, ctx, outcome_tx); - self.num_tasks - .increment_left(1) - .expect("overflowed queued task counter"); - if let Some(queue) = self.retry_queues.read().get(thread_index) { - queue.push(task, delay) - } else { - Err(task) - } - // if unable to queue the task, abandon it - .map_err(|task| self.abandon_task(task)) - .ok() - } - - /// Returns the next queued `Task`. The thread blocks until a new task becomes available, - /// and since this requires holding a lock on the task `Reciever`, this also blocks any - /// other threads that call this method. Returns an error if the task `Sender` has hung up - /// and there are no tasks queued for retry. - pub fn next_task(&self, thread_index: usize) -> Result, NextTaskError> { - loop { - self.resume_gate.wait_while(|| self.is_suspended()); - - if self.is_poisoned() { - return Err(NextTaskError::Poisoned); - } - - if let Some(task) = self - .retry_queues - .read() - .get(thread_index) - .and_then(|queue| queue.try_pop()) - { - break Ok(task); - } - - if let Some(result) = self.get_task(thread_index) { - break result; - } - } - .and_then(|task| match self.num_tasks.transfer(1) { - Ok(_) => Ok(task), - Err(e) => Err(NextTaskError::InvalidCounter(e)), - }) - } - - /// Drains all queued tasks, converts them into `Outcome::Unprocessed` outcomes, and tries - /// to send them or (if the task does not have a sender, or if the send fails) stores them - /// in the `outcomes` map. - pub fn drain_tasks_into_unprocessed(&self) { - let mut outcomes = self.outcomes.lock(); - let task_rx = self.task_rx.lock(); - super::send_or_store(task_rx.try_iter(), &mut outcomes); - let mut retry_queue = self.retry_queues.write(); - for queue in retry_queue.iter_mut() { - super::send_or_store(queue.drain(), &mut outcomes); - } - } - - /// Consumes this `Shared` and returns a `Husk` containing the `Queen`, panic count, stored - /// outcomes, and all configuration information necessary to create a new `Hive`. Any queued - /// tasks are converted into `Outcome::Unprocessed` outcomes and either sent to the task's - /// sender or (if there is no sender, or the send fails) stored in the `outcomes` map. - pub fn try_into_husk(self) -> Husk { - let mut outcomes = self.outcomes.into_inner(); - let task_rx = self.task_rx.into_inner(); - super::send_or_store(task_rx.try_iter(), &mut outcomes); - let mut retry_queue = self.retry_queues.into_inner(); - for queue in retry_queue.iter_mut() { - super::send_or_store(queue.drain(), &mut outcomes); - } - Husk::new( - self.config.into_unsync(), - self.queen.into_inner(), - self.num_panics.into_inner(), - outcomes, - ) - } - } -} - -#[cfg(test)] -mod tests { - use crate::bee::stock::ThunkWorker; - use crate::bee::DefaultQueen; - - type VoidThunkWorker = ThunkWorker<()>; - type VoidThunkWorkerShared = super::Shared>; - - #[test] - fn test_sync_shared() { - fn assert_sync() {} - assert_sync::(); - } - - #[test] - fn test_send_shared() { - fn assert_send() {} - assert_send::(); - } -} diff --git a/src/hive/task.rs b/src/hive/task.rs deleted file mode 100644 index 03d071b..0000000 --- a/src/hive/task.rs +++ /dev/null @@ -1,34 +0,0 @@ -use super::{Outcome, OutcomeSender, Task}; -use crate::bee::{Context, TaskId, Worker}; - -impl Task { - /// Creates a new `Task`. - pub fn new(input: W::Input, ctx: Context, outcome_tx: Option>) -> Self { - Task { - input, - ctx, - outcome_tx, - } - } - - /// Returns the ID of this task. - pub fn id(&self) -> TaskId { - self.ctx.task_id() - } - - /// Consumes this `Task` and returns a tuple `(input, context, outcome_tx)`. - pub fn into_parts(self) -> (W::Input, Context, Option>) { - (self.input, self.ctx, self.outcome_tx) - } - - /// Consumes this `Task` and returns a `Outcome::Unprocessed` outcome with the input and ID, - /// and the outcome sender. - pub fn into_unprocessed(self) -> (Outcome, Option>) { - let (input, ctx, outcome_tx) = self.into_parts(); - let outcome = Outcome::Unprocessed { - input, - task_id: ctx.task_id(), - }; - (outcome, outcome_tx) - } -} diff --git a/src/hive/util.rs b/src/hive/util.rs new file mode 100644 index 0000000..2aeca6c --- /dev/null +++ b/src/hive/util.rs @@ -0,0 +1,34 @@ +//! Internal utilities for the `hive` module. +use crossbeam_utils::Backoff; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +const MAX_WAIT: Duration = Duration::from_secs(10); + +/// Utility function to loop (with exponential backoff) waiting for other references to `arc` to +/// drop so it can be unwrapped into its inner value. +/// +/// If `arc` cannot be unwrapped within a certain amount of time (with an exponentially increasing +/// gap between each iteration), `arc` is returned as an error. +pub fn unwrap_arc(mut arc: Arc) -> Result> { + // wait for worker threads to drop, then take ownership of the shared data and convert it + // into a Husk + let mut backoff = None::; + let mut start = None::; + loop { + arc = match std::sync::Arc::try_unwrap(arc) { + Ok(inner) => { + return Ok(inner); + } + Err(arc) if start.is_none() => { + let _ = start.insert(Instant::now()); + arc + } + Err(arc) if Instant::now() - start.unwrap() > MAX_WAIT => return Err(arc), + Err(arc) => { + backoff.get_or_insert_with(Backoff::new).spin(); + arc + } + }; + } +} diff --git a/src/hive/weighted.rs b/src/hive/weighted.rs new file mode 100644 index 0000000..3931535 --- /dev/null +++ b/src/hive/weighted.rs @@ -0,0 +1,314 @@ +//! Weighted value used for task submission with the `local-batch` feature. +use num::ToPrimitive; +use std::ops::Deref; + +/// Wraps a value of type `T` and an associated weight. +#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Weighted { + value: T, + weight: u32, +} + +impl Weighted { + /// Creates a new `Weighted` instance with the given value and weight. + pub fn new(value: T, weight: P) -> Self { + Self { + value, + weight: weight.to_u32().unwrap(), + } + } + + /// Creates a new `Weighted` instance with the given value and weight obtained from calling the + /// given function on `value`. + pub fn from_fn(value: T, f: F) -> Self + where + F: FnOnce(&T) -> u32, + { + let weight = f(&value); + Self::new(value, weight) + } + + /// Creates a new `Weighted` instance with the given value and weight obtained by converting + /// the value into a `u32`. + pub fn from_identity(value: T) -> Self + where + T: ToPrimitive + Clone, + { + let weight = value.clone().to_u32().unwrap(); + Self::new(value, weight) + } + + /// Returns the weight associated with this `Weighted` value. + pub fn weight(&self) -> u32 { + self.weight + } + + /// Returns the value and weight as a tuple. + pub fn into_parts(self) -> (T, u32) { + (self.value, self.weight) + } +} + +impl Deref for Weighted { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl From for Weighted { + fn from(value: T) -> Self { + Self::new(value, 0) + } +} + +impl From<(T, P)> for Weighted { + fn from((value, weight): (T, P)) -> Self { + Self::new(value, weight) + } +} + +/// Extends `IntoIterator` to add methods to convert any iterator into an iterator over `Weighted` +/// items. +pub trait WeightedIteratorExt: IntoIterator + Sized { + /// Converts this iterator over (T, P) items into an iterator over `Weighted` items with + /// weights set to `P::into_u32()`. + fn into_weighted(self) -> impl Iterator> + where + P: ToPrimitive, + Self: IntoIterator, + { + self.into_iter() + .map(|(value, weight)| Weighted::new(value, weight)) + } + + /// Converts this iterator into an iterator over `Weighted` with weights set to 0. + fn into_default_weighted(self) -> impl Iterator> { + self.into_iter().map(Into::into) + } + + /// Converts this iterator into an iterator over `Weighted` with weights set to + /// `weight`. + fn into_const_weighted(self, weight: u32) -> impl Iterator> { + self.into_iter() + .map(move |item| Weighted::new(item, weight)) + } + + /// Converts this iterator into an iterator over `Weighted` with weights set to + /// `item.clone().into_u32()`. + fn into_identity_weighted(self) -> impl Iterator> + where + Self::Item: ToPrimitive + Clone, + { + self.into_iter().map(Weighted::from_identity) + } + + /// Zips this iterator with `weights` and converts each tuple into a `Weighted` + /// with the weight set to the corresponding value from `weights`. + fn into_weighted_zip(self, weights: W) -> impl Iterator> + where + P: ToPrimitive + Clone + Default, + W: IntoIterator, + W::IntoIter: 'static, + { + self.into_iter() + .zip(weights.into_iter().chain(std::iter::repeat(P::default()))) + .map(Into::into) + } + + /// Converts this interator into an iterator over `Weighted` with weights set to + /// the result of calling `f` on each item. + fn into_weighted_with(self, f: F) -> impl Iterator> + where + F: Fn(&Self::Item) -> u32, + { + self.into_iter().map(move |item| { + let weight = f(&item); + Weighted::new(item, weight) + }) + } + + /// Converts this `ExactSizeIterator` over (T, P) items into an `ExactSizeIterator` over + /// `Weighted` items with weights set to `P::into_u32()`. + fn into_weighted_exact(self) -> impl ExactSizeIterator> + where + Self: IntoIterator, + Self::IntoIter: ExactSizeIterator + 'static, + { + self.into_iter() + .map(|(value, weight)| Weighted::new(value, weight)) + } + + /// Converts this `ExactSizeIterator` into an `ExactSizeIterator` over `Weighted` + /// with weights set to 0. + fn into_default_weighted_exact(self) -> impl ExactSizeIterator> + where + Self::IntoIter: ExactSizeIterator + 'static, + { + self.into_iter().map(Into::into) + } + + /// Converts this `ExactSizeIterator` into an `ExactSizeIterator` over `Weighted` + /// with weights set to `weight`. + fn into_const_weighted_exact( + self, + weight: u32, + ) -> impl ExactSizeIterator> + where + Self::IntoIter: ExactSizeIterator + 'static, + { + self.into_iter() + .map(move |item| Weighted::new(item, weight)) + } + + /// Converts this `ExactSizeIterator` into an `ExactSizeIterator` over `Weighted` + /// with weights set to `item.clone().into_u32()`. + fn into_identity_weighted_exact(self) -> impl ExactSizeIterator> + where + Self::Item: ToPrimitive + Clone, + Self::IntoIter: ExactSizeIterator + 'static, + { + self.into_iter().map(Weighted::from_identity) + } + + /// Converts this `ExactSizeIterator` into an `ExactSizeIterator` over `Weighted` + /// with weights set to the result of calling `f` on each item. + fn into_weighted_exact_with( + self, + f: F, + ) -> impl ExactSizeIterator> + where + Self::IntoIter: ExactSizeIterator + 'static, + F: Fn(&Self::Item) -> u32, + { + self.into_iter().map(move |item| { + let weight = f(&item); + Weighted::new(item, weight) + }) + } +} + +impl WeightedIteratorExt for T {} + +#[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] +mod tests { + use super::*; + + #[test] + fn test_new() { + let weighted = Weighted::new(42, 10); + assert_eq!(*weighted, 42); + assert_eq!(weighted.weight(), 10); + assert_eq!(weighted.into_parts(), (42, 10)); + } + + #[test] + fn test_from_fn() { + let weighted = Weighted::from_fn(42, |x| x * 2); + assert_eq!(*weighted, 42); + assert_eq!(weighted.weight(), 84); + } + + #[test] + fn test_from_identity() { + let weighted = Weighted::from_identity(42); + assert_eq!(*weighted, 42); + assert_eq!(weighted.weight(), 42); + } + + #[test] + fn test_from_unweighted() { + let weighted = Weighted::from(42); + assert_eq!(*weighted, 42); + assert_eq!(weighted.weight(), 0); + } + + #[test] + fn test_from_tuple() { + let weighted: Weighted = Weighted::from((42, 10)); + assert_eq!(*weighted, 42); + assert_eq!(weighted.weight(), 10); + assert_eq!(weighted.into_parts(), (42, 10)); + } + + #[test] + fn test_into_weighted() { + (0..10) + .map(|i| (i, i)) + .into_weighted() + .for_each(|weighted| assert_eq!(weighted.weight(), weighted.value)); + } + + #[test] + fn test_into_default_weighted() { + (0..10) + .into_default_weighted() + .for_each(|weighted| assert_eq!(weighted.weight(), 0)); + } + + #[test] + fn test_into_identity_weighted() { + (0..10) + .into_identity_weighted() + .for_each(|weighted| assert_eq!(weighted.weight(), weighted.value)); + } + + #[test] + fn test_into_const_weighted() { + (0..10) + .into_const_weighted(5) + .for_each(|weighted| assert_eq!(weighted.weight(), 5)); + } + + #[test] + fn test_into_weighted_zip() { + (0..10) + .into_weighted_zip(10..20) + .for_each(|weighted| assert_eq!(weighted.weight(), weighted.value + 10)); + } + + #[test] + fn test_into_weighted_with() { + (0..10) + .into_weighted_with(|i| i * 2) + .for_each(|weighted| assert_eq!(weighted.weight(), weighted.value * 2)); + } + + #[test] + fn test_into_weighted_exact() { + (0..10) + .map(|i| (i, i)) + .into_weighted_exact() + .for_each(|weighted| assert_eq!(weighted.weight(), weighted.value)); + } + + #[test] + fn test_into_default_weighted_exact() { + (0..10) + .into_default_weighted_exact() + .for_each(|weighted| assert_eq!(weighted.weight(), 0)); + } + + #[test] + fn test_into_identity_weighted_exact() { + (0..10) + .into_identity_weighted_exact() + .for_each(|weighted| assert_eq!(weighted.weight(), weighted.value)); + } + + #[test] + fn test_into_const_weighted_exact() { + (0..10) + .into_const_weighted_exact(5) + .for_each(|weighted| assert_eq!(weighted.weight(), 5)); + } + + #[test] + fn test_into_weighted_exact_with() { + (0..10) + .into_weighted_exact_with(|i| i * 2) + .for_each(|weighted| assert_eq!(weighted.weight(), weighted.value * 2)); + } +} diff --git a/src/lib.rs b/src/lib.rs index a60facf..bc73071 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![cfg_attr(coverage_nightly, feature(coverage_attribute))] //! A Rust library that provides a [thread pool](https://en.wikipedia.org/wiki/Thread_pool) //! implementation designed to execute the same operation in parallel on any number of inputs (this //! is sometimes called a "worker pool"). @@ -7,9 +8,24 @@ //! * Operations are defined by implementing the [`Worker`](crate::bee::Worker) trait. //! * A [`Builder`](crate::hive::Builder) is used to configure and create a worker pool //! called a [`Hive`](crate::hive::Hive). +//! * `Hive` is generic over: +//! * The type of [`Queen`](crate::bee::Queen), which creates `Worker` instances +//! * The type of [`TaskQueues`](crate::hive::TaskQueues), which provides the global and +//! worker thread-local queues for managing tasks; there are currently two implementations: +//! * Channel: A +//! [`crossbeam` channel](https://docs.rs/crossbeam-channel/latest/crossbeam_channel/) +//! is used to send tasks from the `Hive` to the worker threads. *This is a good choice +//! for most workloads*. +//! * Workstealing: A +//! [`crossbeam_dequeue::Injector`](https://docs.rs/crossbeam-deque/latest/crossbeam_deque/struct.Injector.html) +//! is used to submit tasks and serves as a global queue. Worker threads each have their +//! own local queue and can take tasks either from the global queue or steal from other +//! workers' local queues if their own queue is empty. This is a good choice for workloads +//! that are either highly variable from task to task (in terms of processing time), or +//! are fork-join in nature (i.e., tasks that submit sub-tasks). //! * The `Hive` creates a `Worker` instance for each thread in the pool. //! * Each thread in the pool continually: -//! * Recieves a task from an input [`channel`](::std::sync::mpsc::channel), +//! * Receives a task from an input queue, //! * Calls its `Worker`'s [`apply`](crate::bee::Worker::apply) method on the input, and //! * Produces an [`Outcome`](crate::hive::Outcome). //! * Depending on which of `Hive`'s methods are called to submit a task (or batch of tasks), the @@ -20,8 +36,9 @@ //! * Clone an instance of a `Worker` that implements [`Clone`] //! * Call the [`create()`](crate::bee::Queen::create) method on a worker factory that //! implements the [`Queen`](crate::bee::Queen) trait. -//! * Both `Worker`s and `Queen`s may be stateful, i.e., `Worker::apply()` and `Queen::create()` -//! both take `&mut self`. +//! * A `Worker`s may be stateful, i.e., `Worker::apply()` takes `&mut self`. +//! * While `Queen` is not stateful, [`QueenMut`](crate::bee::QueenMut) may be (i.e., it's +//! `create()` method takes `&mut self`) //! * Although it is strongly recommended to avoid `panic`s in worker threads (and thus, within //! `Worker` implementations), the `Hive` does automatically restart any threads that panic. //! * A `Hive` may be [`suspend`](crate::hive::Hive::suspend)ed and @@ -35,9 +52,14 @@ //! * The following optional features are provided via feature flags: //! * `affinity`: worker threads may be pinned to CPU cores to minimize the overhead of //! context-switching. +//! * `local-batch` (>=0.3.0): worker threads take batches of tasks from the global input queue +//! and add them to a local queue, which may alleviate thread contention, especially when +//! there are many short-lived tasks. +//! * Tasks may be [`Weighted`](crate::hive::Weighted) to enable balancing unevenly sized +//! tasks between worker threads. //! * `retry`: Tasks that fail due to transient errors (e.g., temporarily unavailable resources) -//! may be retried a set number of times, with an optional, exponentially increasing delay -//! between retries. +//! may be retried up to a set number of times, with an optional, exponentially increasing +//! delay between retries. //! * Several alternative `channel` implementations are supported: //! * [`crossbeam`](https://docs.rs/crossbeam/latest/crossbeam/) //! * [`flume`](https://github.com/zesterer/flume) @@ -56,24 +78,32 @@ //! * Do at least one of the following: //! * Implement [`Default`] for your worker //! * Implement [`Clone`] for your worker -//! * Create a custom worker fatory that implements the [`Queen`](crate::bee::Queen) -//! trait +//! * Create a custom worker factory that implements the [`Queen`](crate::bee::Queen) +//! or [`QueenMut`](crate::bee::QueenMut) trait //! 2. A [`Hive`](crate::hive::Hive) to execute your tasks. Your options are: //! * Use one of the convenience methods in the [`util`] module (see Example 1 below) -//! * Create a `Hive` manually using [`Builder`](crate::hive::Builder) (see Examples 2 +//! * Create a `Hive` manually using a [`Builder`](crate::hive::Builder) (see Examples 2 //! and 3 below) -//! * [`Builder::new()`](crate::hive::Builder::new) creates an empty `Builder` -//! * [`Builder::default()`](crate::hive::Builder::default) creates a `Builder` +//! * [`OpenBuilder`](crate::hive::OpenBuilder) is the most general builder +//! * [`OpenBuilder::empty()`](crate::hive::OpenBuilder::empty) creates an empty `OpenBuilder` +//! * [`OpenBuilder::default()`](crate::hive::OpenBuilder::default) creates a `OpenBuilder` //! with the global default settings (which may be changed using the functions in the //! [`hive`] module, e.g., `beekeeper::hive::set_num_threads_default(4)`). -//! * Use one of the `build_*` methods to build the `Hive`: +//! * The builder must be specialized for the `Queen` and `TaskQueues` types: //! * If you have a `Worker` that implements `Default`, use -//! [`build_with_default::()`](crate::hive::Builder::build_with_default) +//! [`with_worker_default::()`](crate::hive::OpenBuilder::with_worker_default) //! * If you have a `Worker` that implements `Clone`, use -//! [`build_with(MyWorker::new())`](crate::hive::Builder::build_with) +//! [`with_worker(MyWorker::new())`](crate::hive::OpenBuilder::with_worker) //! * If you have a custom `Queen`, use -//! [`build_default::()`](crate::hive::Builder::build_default) if it implements -//! `Default`, otherwise use [`build(MyQueen::new())`](crate::hive::Builder::build) +//! [`with_queen_default::()`](crate::hive::OpenBuilder::with_queen_default) +//! if it implements `Default`, otherwise use +//! [`with_queen(MyQueen::new())`](crate::hive::OpenBuilder::with_queen) +//! * If instead your queen implements `QueenMut`, use +//! [`with_queen_mut_default::()`](crate::hive::OpenBuilder::with_queen_mut_default) +//! or [`with_queen_mut(MyQueenMut::new())`](crate::hive::OpenBuilder::with_queen_mut) +//! * Use [`with_channel_queues`](crate::hive::OpenBuilder::with_channel_queues) or +//! [`with_workstealing_queues`](crate::hive::OpenBuilder::with_workstealing_queues) +//! to specify the `TaskQueues` type. //! * Note that [`Builder::num_threads()`](crate::hive::Builder::num_threads) must be set //! to a non-zero value, otherwise the built `Hive` will not start any worker threads //! until you call the [`Hive::grow()`](crate::hive::Hive::grow) method. @@ -97,8 +127,7 @@ //! * The methods with the `_send` suffix accept a channel [`Sender`](crate::channel::Sender) and //! send the `Outcome`s to that channel as they are completed //! * The methods with the `_store` suffix store the `Outcome`s in the `Hive`; these may be -//! retrieved later using the [`Hive::take_stored()`](crate::hive::Hive::take_stored) method, -//! using one of the `remove*` methods (which requires +//! retrieved later using one of the `remove*` methods (which requires //! [`OutcomeStore`](crate::hive::OutcomeStore) to be in scope), or by //! using one of the methods on [`Husk`](crate::hive::Husk) after shutting down the `Hive` using //! [`Hive::try_into_husk()`](crate::hive::Hive::try_into_husk). @@ -151,22 +180,23 @@ //! # fn main() { //! // create a hive to process `Thunk`s - no-argument closures with the //! // same return type (`i32`) -//! let hive = Builder::new() +//! let hive = channel_builder(false) //! .num_threads(4) //! .thread_name("thunk_hive") -//! .build_with_default::>(); +//! .with_worker_default::>() +//! .build(); //! //! // return results to your own channel... //! let (tx, rx) = outcome_channel(); //! let _ = hive.swarm_send( -//! (0..10).map(|i: i32| Thunk::of(move || i * i)), +//! (0..10).map(|i: i32| Thunk::from(move || i * i)), //! tx //! ); //! assert_eq!(285, rx.into_outputs().take(10).sum()); //! //! // return results as an iterator... //! let total = hive -//! .swarm_unordered((0..10).map(|i: i32| Thunk::of(move || i * -i))) +//! .swarm_unordered((0..10).map(|i: i32| Thunk::from(move || i * -i))) //! .into_outputs() //! .sum(); //! assert_eq!(-285, total); @@ -220,7 +250,7 @@ //! fn apply( //! &mut self, //! input: Self::Input, -//! _: &Context +//! _: &Context //! ) -> WorkerResult { //! self.write_char(input).map_err(|error| { //! ApplyError::Fatal { input: Some(input), error } @@ -242,7 +272,7 @@ //! } //! } //! -//! impl Queen for CatQueen { +//! impl QueenMut for CatQueen { //! type Kind = CatWorker; //! //! fn create(&mut self) -> Self::Kind { @@ -277,9 +307,10 @@ //! //! # fn main() { //! // build the Hive -//! let hive = Builder::new() +//! let hive = channel_builder(false) //! .num_threads(4) -//! .build_default::(); +//! .with_queen_mut_default::() +//! .build(); //! //! // prepare inputs //! let inputs = (0..8).map(|i| 97 + i); @@ -294,15 +325,15 @@ //! }) //! .into_bytes(); //! -//! // verify the output - note that `swarm` ensures the outputs are in -//! // the same order as the inputs +//! // verify the output - note that `swarm` ensures the outputs are in the same order +//! // as the inputs //! assert_eq!(output, b"abcdefgh"); //! -//! // shutdown the hive, use the Queen to wait on child processes, and -//! // report errors -//! let (mut queen, _outcomes) = hive.try_into_husk().unwrap().into_parts(); +//! // shutdown the hive, use the Queen to wait on child processes, and report errors; +//! // the `_outcomes` will be empty as we did not use any `_store` methods +//! let (queen, _outcomes) = hive.try_into_husk(false).unwrap().into_parts(); //! let (wait_ok, wait_err): (Vec<_>, Vec<_>) = -//! queen.wait_for_all().into_iter().partition(Result::is_ok); +//! queen.into_inner().wait_for_all().into_iter().partition(Result::is_ok); //! if !wait_err.is_empty() { //! panic!( //! "Error(s) occurred while waiting for child processes: {:?}", @@ -323,9 +354,9 @@ //! } //! # } //! ``` - mod atomic; #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod barrier; pub mod bee; mod boxed; diff --git a/src/panic.rs b/src/panic.rs index d6adef6..06f76ad 100644 --- a/src/panic.rs +++ b/src/panic.rs @@ -1,19 +1,21 @@ //! Data type that wraps a `panic` payload. use super::boxed::BoxedFnOnce; +use derive_more::Debug; use std::any::Any; -use std::fmt::Debug; +use std::fmt; use std::panic::AssertUnwindSafe; pub type PanicPayload = Box; /// Wraps a payload from a caught `panic` with an optional `detail`. #[derive(Debug)] -pub struct Panic { +pub struct Panic { + #[debug("")] payload: PanicPayload, detail: Option, } -impl Panic { +impl Panic { /// Attempts to call the provided function `f` and catches any panic. Returns either the return /// value of the function or a `Panic` created from the panic payload and the provided `detail`. pub fn try_call O>(detail: Option, f: F) -> Result { @@ -44,15 +46,16 @@ impl Panic { } } -impl PartialEq for Panic { +impl PartialEq for Panic { fn eq(&self, other: &Self) -> bool { (*self.payload).type_id() == (*other.payload).type_id() && self.detail == other.detail } } -impl Eq for Panic {} +impl Eq for Panic {} #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use super::Panic; use std::fmt::Debug; diff --git a/src/util.rs b/src/util.rs index aab909d..1bbb385 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,10 +1,10 @@ //! Utility functions for simple use cases. //! //! In all cases, the number of threads is specified as a parameter, and the function takes care of -//! creating the [`Hive`](crate::hive::Hive), submitting tasks, collecting results, and shutting -//! down the `Hive` properly. +//! creating the [`Hive`](crate::hive::Hive) (with channel-based task queues), submitting tasks, +//! collecting results, and shutting down the `Hive` properly. use crate::bee::stock::{Caller, OnceCaller}; -use crate::hive::{Builder, Outcome, OutcomeBatch}; +use crate::hive::{Builder, ChannelBuilder, Outcome, OutcomeBatch, TaskQueuesBuilder}; use std::fmt::Debug; /// Convenience function that creates a `Hive` with `num_threads` worker threads that execute the @@ -28,9 +28,10 @@ where Inputs: IntoIterator, F: FnMut(I) -> O + Send + Sync + Clone + 'static, { - Builder::default() + ChannelBuilder::default() .num_threads(num_threads) - .build_with(Caller::of(f)) + .with_worker(Caller::from(f)) + .build() .map(inputs) .map(Outcome::unwrap) .collect() @@ -67,14 +68,16 @@ where Inputs: IntoIterator, F: FnMut(I) -> Result + Send + Sync + Clone + 'static, { - Builder::default() + ChannelBuilder::default() .num_threads(num_threads) - .build_with(OnceCaller::of(f)) + .with_worker(OnceCaller::from(f)) + .build() .map(inputs) .into() } #[cfg(test)] +#[cfg_attr(coverage_nightly, coverage(off))] mod tests { use crate::hive::{Outcome, OutcomeStore}; @@ -90,11 +93,7 @@ mod tests { 4, 0..100, |i| { - if i == 50 { - Err("Fiddy!") - } else { - Ok(i + 1) - } + if i == 50 { Err("Fiddy!") } else { Ok(i + 1) } }, ); assert!(result.has_failures()); @@ -115,7 +114,7 @@ pub use retry::try_map_retryable; mod retry { use crate::bee::stock::RetryCaller; use crate::bee::{ApplyError, Context}; - use crate::hive::{Builder, OutcomeBatch}; + use crate::hive::{Builder, ChannelBuilder, OutcomeBatch, TaskQueuesBuilder}; use std::fmt::Debug; /// Convenience function that creates a `Hive` with `num_threads` worker threads that execute the @@ -145,7 +144,7 @@ mod retry { /// ``` pub fn try_map_retryable( num_threads: usize, - max_retries: u32, + max_retries: u8, inputs: Inputs, f: F, ) -> OutcomeBatch> @@ -154,17 +153,19 @@ mod retry { O: Send + Sync + 'static, E: Send + Sync + Debug + 'static, Inputs: IntoIterator, - F: FnMut(I, &Context) -> Result> + Send + Sync + Clone + 'static, + F: FnMut(I, &Context) -> Result> + Send + Sync + Clone + 'static, { - Builder::default() + ChannelBuilder::default() .num_threads(num_threads) .max_retries(max_retries) - .build_with(RetryCaller::of(f)) + .with_worker(RetryCaller::from(f)) + .build() .map(inputs) .into() } #[cfg(test)] + #[cfg_attr(coverage_nightly, coverage(off))] mod tests { use crate::bee::ApplyError; use crate::hive::{Outcome, OutcomeStore};