diff --git a/src/include/nanobench.h b/src/include/nanobench.h index d233dcc..1f798b8 100644 --- a/src/include/nanobench.h +++ b/src/include/nanobench.h @@ -137,6 +137,11 @@ class Result; class Rng; class BigO; +namespace detail { +template +class SetupRunner; +} // namespace detail + /** * @brief Renders output from a mustache-like template and benchmark results. * @@ -1007,7 +1012,21 @@ class Bench { Bench& config(Config const& benchmarkConfig); ANKERL_NANOBENCH(NODISCARD) Config const& config() const noexcept; + /** + * @brief Configure an untimed setup step per epoch (fluent API). + * + * Example: `bench.setup(...).run(...);` + */ + template + detail::SetupRunner setup(SetupOp setupOp); + private: + template + Bench& runImpl(SetupOp& setupOp, Op&& op); + + template + friend class detail::SetupRunner; + Config mConfig{}; std::vector mResults{}; }; @@ -1207,14 +1226,44 @@ constexpr uint64_t Rng::rotl(uint64_t x, unsigned k) noexcept { return (x << k) | (x >> (64U - k)); } +namespace detail { + +template +class SetupRunner { +public: + explicit SetupRunner(SetupOp setupOp, Bench& bench) + : mSetupOp(std::move(setupOp)) + , mBench(bench) {} + + template + ANKERL_NANOBENCH_NO_SANITIZE("integer") + Bench& run(Op&& op) { + return mBench.runImpl(mSetupOp, std::forward(op)); + } + +private: + SetupOp mSetupOp; + Bench& mBench; +}; +} // namespace detail + template ANKERL_NANOBENCH_NO_SANITIZE("integer") Bench& Bench::run(Op&& op) { + auto setupOp = [] {}; + return runImpl(setupOp, std::forward(op)); +} + +template +ANKERL_NANOBENCH_NO_SANITIZE("integer") +Bench& Bench::runImpl(SetupOp& setupOp, Op&& op) { // It is important that this method is kept short so the compiler can do better optimizations/ inlining of op() detail::IterationLogic iterationLogic(*this); auto& pc = detail::performanceCounters(); while (auto n = iterationLogic.numIters()) { + setupOp(); + pc.beginMeasure(); Clock::time_point const before = Clock::now(); while (n-- > 0) { @@ -1229,6 +1278,11 @@ Bench& Bench::run(Op&& op) { return *this; } +template +detail::SetupRunner Bench::setup(SetupOp setupOp) { + return detail::SetupRunner(std::move(setupOp), *this); +} + // Performs all evaluations. template Bench& Bench::run(char const* benchmarkName, Op&& op) { diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index d544752..e7c918c 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -32,6 +32,7 @@ target_sources_local(nb PRIVATE unit_cold.cpp unit_exact_iters_and_epochs.cpp unit_romutrio.cpp + unit_setup.cpp unit_templates.cpp unit_timeunit.cpp unit_to_s.cpp diff --git a/src/test/unit_setup.cpp b/src/test/unit_setup.cpp new file mode 100644 index 0000000..bbc3e71 --- /dev/null +++ b/src/test/unit_setup.cpp @@ -0,0 +1,41 @@ +#include +#include + +#include +#include +#include + +// NOLINTNEXTLINE +TEST_CASE("unit_setup_time_is_excluded_from_measurement") { + size_t setupCalls = 0; + size_t benchCalls = 0; + + ankerl::nanobench::Bench bench; + bench.output(nullptr).warmup(0).epochs(3).epochIterations(1).performanceCounters(false); + bench + .setup([&] { + ++setupCalls; + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + }) + .run([&] { ++benchCalls; }); + + REQUIRE(setupCalls == 3); + REQUIRE(benchCalls == 3); + + // Setup sleeps 50ms per epoch, but measured time should be near zero + double const elapsedS = bench.results().back().median(ankerl::nanobench::Result::Measure::elapsed); + REQUIRE(elapsedS < 0.01); +} + +// NOLINTNEXTLINE +TEST_CASE("unit_setup_runs_before_each_epoch") { + std::vector callSequence; + + ankerl::nanobench::Bench bench; + bench.output(nullptr).warmup(0).epochs(2).epochIterations(3).performanceCounters(false); + bench + .setup([&] { callSequence.push_back('S'); }) + .run([&] { callSequence.push_back('R'); }); + + REQUIRE(callSequence == std::vector{'S', 'R', 'R', 'R', 'S', 'R', 'R', 'R'}); +}