From 571375934d370f0bbfe7afb6aefa5a2505fb7746 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Sat, 21 Feb 2026 23:41:51 +0000
Subject: [PATCH 1/9] Clean up perf/ benchmarks

- Remove broken Turing integration tests (p0.jl, p1.jl, p2.jl) and the
  runtests.jl that included them.
- Use Chairmarks (it's just much faster)
- Improved printing of results
- General modernisation of the script.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 perf/Project.toml |  15 ++----
 perf/benchmark.jl | 124 +++++++++++++++++++---------------------------
 perf/p0.jl        |  39 ---------------
 perf/p1.jl        |  36 --------------
 perf/p2.jl        |  63 -----------------------
 perf/runtests.jl  |   4 --
 6 files changed, 54 insertions(+), 227 deletions(-)
 delete mode 100644 perf/p0.jl
 delete mode 100644 perf/p1.jl
 delete mode 100644 perf/p2.jl
 delete mode 100644 perf/runtests.jl

diff --git a/perf/Project.toml b/perf/Project.toml
index 6522964d..52ae69ca 100644
--- a/perf/Project.toml
+++ b/perf/Project.toml
@@ -1,14 +1,7 @@
 [deps]
-AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
-AdvancedPS = "576499cb-2369-40b2-a588-c64705576edc"
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
-DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
+Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
 Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
-[compat]
-julia = "1.10.8"
-
-[targets]
-test = ["Test", "BenchmarkTools"]
+[sources]
+Libtask = {path = "../"}
diff --git a/perf/benchmark.jl b/perf/benchmark.jl
index 74618aac..b69cdd5f 100644
--- a/perf/benchmark.jl
+++ b/perf/benchmark.jl
@@ -1,55 +1,50 @@
 using Libtask
 using LinearAlgebra
-using BenchmarkTools
-
-####################################################################
-
-function benchmark_driver!(f, x...; f_displayname=string(f))
-    x = (x..., nothing)
-
-    println("benchmarking $(f_displayname)...")
-    tf = Libtask.TapedTask(nothing, f, x...)
-
-    print("  Run Original Function:")
-    @btime $f($(x)...)
-    GC.gc()
-
-    print("  Run TapedTask: ")
-    x = (x[1:(end - 1)]..., produce)
-    # show the number of produce calls inside `f`
-    function f_task(f, x; verbose=false)
-        tt = TapedTask(nothing, f, x...)
-        c = 0
+using Chairmarks: @b
+
+# Each benchmark function takes a `maybe_produce` as its last argument, defaulting to
+# `identity` (a no-op). The benchmark driver calls the function twice: once with the
+# default (measuring raw performance) and once via a TapedTask that passes `produce` as the
+# last argument (measuring the overhead of the produce/consume machinery).
+function benchmark(f, x...)
+    printstyled(string(f), "\n"; bold=true)
+
+    # Baseline: call f directly with maybe_produce=identity (the default).
+    baseline = @b $f($(x)...)
+
+    # TapedTask: pass `produce` so every `maybe_produce(...)` call yields a value.
+    function f_via_task(f, x)
+        tt = TapedTask(nothing, f, x..., produce)
+        n = 0
         while consume(tt) !== nothing
-            c += 1
+            n += 1
         end
-        return verbose && print("#produce=", c, "; ")
+        return n
     end
-    # Note that we need to pass `f` instead of `tf` to avoid
-    #  default continuation in `TapedTask` constructor, see, e.g.
-    #  https://github.com/TuringLang/Libtask.jl/pull/135
-    f_task(f, x; verbose=true) # print #produce calls
-    @btime $f_task($f, $x)
-    GC.gc()
-    return nothing
+    n_produces = f_via_task(f, x)
+    taped = @b $f_via_task($f, $x)
+
+    noun = n_produces == 1 ? "produce" : "produces"
+    label = "taped ($n_produces $noun)"
+    print(rpad("baseline", length(label)), "  ")
+    display(baseline)
+    print(label, "  ")
+    display(taped)
+    ratio = round(taped.time / baseline.time; digits=1)
+    println(rpad("ratio", length(label)), "  ", "$(ratio)x")
+    println()
 end
 
-####################################################################
-
-function rosenbrock(x, callback=nothing)
+function rosenbrock(x, maybe_produce=identity)
     i = x[2:end]
-    j = x[1:(end - 1)]
+    j = x[1:(end-1)]
     ret = sum((1 .- j) .^ 2 + 100 * (i - j .^ 2) .^ 2)
-    callback !== nothing && callback(ret)
+    maybe_produce(ret)
     return ret
 end
+benchmark(rosenbrock, rand(100_000))
 
-x = rand(100000)
-benchmark_driver!(rosenbrock, x)
-
-####################################################################
-
-function ackley(x::AbstractVector, callback=nothing)
+function ackley(x::AbstractVector, maybe_produce=identity)
     a, b, c = 20.0, -0.2, 2.0 * π
     len_recip = inv(length(x))
     sum_sqrs = zero(eltype(x))
@@ -57,48 +52,29 @@ function ackley(x::AbstractVector, callback=nothing)
     for i in x
         sum_cos += cos(c * i)
         sum_sqrs += i^2
-        callback !== nothing && callback(sum_sqrs)
+        maybe_produce(sum_sqrs)
     end
-    return (
-        -a * exp(b * sqrt(len_recip * sum_sqrs)) - exp(len_recip * sum_cos) +
-        a +
-        MathConstants.e
-    )
+    return -a * exp(b * sqrt(len_recip * sum_sqrs)) - exp(len_recip * sum_cos) + a + MathConstants.e
 end
-
-x = rand(100000)
-benchmark_driver!(ackley, x)
-
-####################################################################
-function generate_matrix_test(n)
-    return (x, callback=nothing) -> begin
-        # @assert length(x) == 2n^2 + n
-        a = reshape(x[1:(n^2)], n, n)
-        b = reshape(x[(n^2 + 1):(2n^2)], n, n)
-        ret = log.((a * b) + a - b)
-        callback !== nothing && callback(ret)
-        return ret
-    end
+benchmark(ackley, rand(100_000))
+
+function matrix_test(x, maybe_produce=identity)
+    n = 100
+    a = reshape(x[1:(n^2)], n, n)
+    b = reshape(x[(n^2+1):(2n^2)], n, n)
+    ret = log.((a * b) + a - b)
+    maybe_produce(ret)
+    return ret
 end
+benchmark(matrix_test, collect(1.0:(2*100^2+100)))
 
-n = 100
-matrix_test = generate_matrix_test(n)
-x = collect(1.0:(2n^2 + n))
-benchmark_driver!(matrix_test, x; f_displayname="matrix_test")
-
-####################################################################
 relu(x) = log.(1.0 .+ exp.(x))
 sigmoid(n) = 1.0 / (1.0 + exp(-n))
-
-function neural_net(w1, w2, w3, x1, callback=nothing)
+function neural_net(w1, w2, w3, x1, maybe_produce=identity)
     x2 = relu(w1 * x1)
     x3 = relu(w2 * x2)
     ret = sigmoid(LinearAlgebra.dot(w3, x3))
-    callback !== nothing && callback(ret)
+    maybe_produce(ret)
     return ret
 end
-
-xs = (randn(10, 10), randn(10, 10), randn(10), rand(10))
-benchmark_driver!(neural_net, xs...)
-
-println("done")
+benchmark(neural_net, randn(10, 10), randn(10, 10), randn(10), rand(10))
diff --git a/perf/p0.jl b/perf/p0.jl
deleted file mode 100644
index c317b885..00000000
--- a/perf/p0.jl
+++ /dev/null
@@ -1,39 +0,0 @@
-using Random
-using Libtask
-using Turing, DynamicPPL, AdvancedPS
-using BenchmarkTools
-
-@model gdemo(x, y) = begin
-    # Assumptions
-    σ ~ InverseGamma(2, 3)
-    μ ~ Normal(0, sqrt(σ))
-    # Observations
-    x ~ Normal(μ, sqrt(σ))
-    y ~ Normal(μ, sqrt(σ))
-end
-
-# Case 1: Sample from the prior.
-rng = MersenneTwister()
-m = Turing.Inference.TracedModel(gdemo(1.5, 2.0), SampleFromPrior(), VarInfo(), rng)
-f = m.evaluator[1];
-args = m.evaluator[2:end];
-
-println("Directly call...")
-@btime f(args...)
-# (2.0, VarInfo (2 variables (μ, σ), dimension 2; logp: -6.162))
-println("TapedTask construction...")
-t = @btime TapedTask(f, args...)
-println("Run a tape...")
-@btime t.tf(args...)
-
-# Case 2: SMC sampler
-m = Turing.Inference.TracedModel(gdemo(1.5, 2.0), Sampler(SMC(50)), VarInfo(), rng)
-f = m.evaluator[1];
-args = m.evaluator[2:end];
-
-println("Directly call...")
-@btime f(args...)
-println("TapedTask construction...")
-t = @btime TapedTask(f, args...)
-println("Run a tape...")
-@btime t.tf(args...)
diff --git a/perf/p1.jl b/perf/p1.jl
deleted file mode 100644
index 34797f3c..00000000
--- a/perf/p1.jl
+++ /dev/null
@@ -1,36 +0,0 @@
-using Turing, Test, AbstractMCMC, DynamicPPL, Random
-
-import AbstractMCMC.AbstractSampler
-
-function check_numerical(chain, symbols::Vector, exact_vals::Vector; atol=0.2, rtol=0.0)
-    for (sym, val) in zip(symbols, exact_vals)
-        E = val isa Real ? mean(chain[sym]) : vec(mean(chain[sym]; dims=1))
-        @info (symbol=sym, exact=val, evaluated=E)
-        @test E ≈ val atol = atol rtol = rtol
-    end
-end
-
-function check_MoGtest_default(chain; atol=0.2, rtol=0.0)
-    return check_numerical(
-        chain,
-        [:z1, :z2, :z3, :z4, :mu1, :mu2],
-        [1.0, 1.0, 2.0, 2.0, 1.0, 4.0];
-        atol=atol,
-        rtol=rtol,
-    )
-end
-
-@model gdemo_d(x, y) = begin
-    s ~ InverseGamma(2, 3)
-    m ~ Normal(0, sqrt(s))
-    x ~ Normal(m, sqrt(s))
-    y ~ Normal(m, sqrt(s))
-    return s, m
-end
-
-alg = CSMC(15)
-chain = sample(gdemo_d(1.5, 2.0), alg, 5_000)
-
-@show chain
-
-check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.1)
diff --git a/perf/p2.jl b/perf/p2.jl
deleted file mode 100644
index 6a883411..00000000
--- a/perf/p2.jl
+++ /dev/null
@@ -1,63 +0,0 @@
-using Turing, Test, AbstractMCMC, DynamicPPL, Random, Turing.RandomMeasures, Libtask
-
-@model infiniteGMM(x) = begin
-    # Hyper-parameters, i.e. concentration parameter and parameters of H.
-    α = 1.0
-    μ0 = 0.0
-    σ0 = 1.0
-
-    # Define random measure, e.g. Dirichlet process.
-    rpm = DirichletProcess(α)
-
-    # Define the base distribution, i.e. expected value of the Dirichlet process.
-    H = Normal(μ0, σ0)
-
-    # Latent assignment.
-    z = tzeros(Int, length(x))
-
-    # Locations of the infinitely many clusters.
-    μ = tzeros(Float64, 0)
-
-    for i in 1:length(x)
-
-        # Number of clusters.
-        K = maximum(z)
-        nk = Vector{Int}(map(k -> sum(z .== k), 1:K))
-
-        # Draw the latent assignment.
-        z[i] ~ ChineseRestaurantProcess(rpm, nk)
-
-        # Create a new cluster?
-        if z[i] > K
-            push!(μ, 0.0)
-
-            # Draw location of new cluster.
-            μ[z[i]] ~ H
-        end
-
-        # Draw observation.
-        x[i] ~ Normal(μ[z[i]], 1.0)
-    end
-end
-
-# Generate some test data.
-rng = Random.seed!(1)
-
-data = vcat(randn(rng, 10), randn(rng, 10) .- 5, randn(rng, 10) .+ 10)
-data .-= mean(data)
-data /= std(data)
-
-# MCMC sampling
-Random.seed!(rng, 2)
-iterations = 500
-model_fun = infiniteGMM(data)
-
-m = Turing.Inference.TracedModel(model_fun, Sampler(SMC(50)), VarInfo(), rng)
-f = m.evaluator[1]
-args = m.evaluator[2:end]
-
-t = TapedTask(f, args...)
-
-t.tf(args...)
-
-@show Libtask.result(t.tf)
diff --git a/perf/runtests.jl b/perf/runtests.jl
deleted file mode 100644
index d7a0f0c7..00000000
--- a/perf/runtests.jl
+++ /dev/null
@@ -1,4 +0,0 @@
-include("benchmark.jl")
-include("p0.jl")
-include("p1.jl")
-include("p2.jl")

From 1e1e60bff3e7da7dc3a79f9b9ac9e61a168ac0ea Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Sat, 21 Feb 2026 23:43:05 +0000
Subject: [PATCH 2/9] Move perf/ to benchmarks/ which is more descriptive

---
 {perf => benchmarks}/Project.toml | 0
 {perf => benchmarks}/benchmark.jl | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename {perf => benchmarks}/Project.toml (100%)
 rename {perf => benchmarks}/benchmark.jl (100%)

diff --git a/perf/Project.toml b/benchmarks/Project.toml
similarity index 100%
rename from perf/Project.toml
rename to benchmarks/Project.toml
diff --git a/perf/benchmark.jl b/benchmarks/benchmark.jl
similarity index 100%
rename from perf/benchmark.jl
rename to benchmarks/benchmark.jl

From dc8c38631438c3ab6f178805f07cf481fafa5438 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Sat, 21 Feb 2026 23:48:31 +0000
Subject: [PATCH 3/9] update CI workflows

---
 .github/workflows/Benchmark.yml               | 20 ++++++++
 .../BenchmarksAndMicroIntegration.yml         | 49 -------------------
 .github/workflows/CI.yml                      | 36 ++++++++++++++
 .github/workflows/Testing.yaml                | 45 -----------------
 4 files changed, 56 insertions(+), 94 deletions(-)
 create mode 100644 .github/workflows/Benchmark.yml
 delete mode 100644 .github/workflows/BenchmarksAndMicroIntegration.yml
 create mode 100644 .github/workflows/CI.yml
 delete mode 100644 .github/workflows/Testing.yaml

diff --git a/.github/workflows/Benchmark.yml b/.github/workflows/Benchmark.yml
new file mode 100644
index 00000000..27741a19
--- /dev/null
+++ b/.github/workflows/Benchmark.yml
@@ -0,0 +1,20 @@
+name: Benchmarks
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  benchmarks:
+    name: Benchmarks
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: "1"
+      - uses: julia-actions/cache@v2
+      - name: Run benchmarks
+        run: julia --project=benchmarks -e 'using Pkg; Pkg.instantiate(); include("benchmarks/benchmark.jl")'
diff --git a/.github/workflows/BenchmarksAndMicroIntegration.yml b/.github/workflows/BenchmarksAndMicroIntegration.yml
deleted file mode 100644
index 5a982753..00000000
--- a/.github/workflows/BenchmarksAndMicroIntegration.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: Benchmarks and MicroIntegration
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-
-jobs:
-  test:
-    name: Benchmarks and MicroIntegration
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        package:
-          - {user: TuringLang, repo: Turing.jl, ref: main}
-
-    steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
-        with:
-          version: 1
-          arch: x64
-      - uses: julia-actions/julia-buildpkg@latest
-      - name: Clone Downstream
-        uses: actions/checkout@v2
-        with:
-          repository: ${{ matrix.package.user }}/${{ matrix.package.repo }}
-          ref: ${{ matrix.package.ref }}
-          path: downstream
-      - name: Load this and run the downstream tests
-        shell: julia --color=yes --project=perf {0}
-        run: |
-          using Pkg
-          try
-            # force it to use this PR's version of the package
-            Pkg.develop(PackageSpec(path="downstream"))
-            Pkg.develop(PackageSpec(path="."))  # resolver may fail with main deps
-            Pkg.update()
-            include(pwd()*"/perf/runtests.jl")
-          catch err
-            err isa Pkg.Resolve.ResolverError || rethrow()
-            # If we can't resolve that means this is incompatible by SemVer and this is fine
-            # It means we marked this as a breaking change, so we don't need to worry about
-            # Mistakenly introducing a breaking change, as we have intentionally made one
-            @info "Not compatible with this release. No problem." exception=err
-            exit(0)  # Exit immediately, as a success
-          end
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 00000000..7e7f6ed8
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,36 @@
+name: Libtask Testing
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    continue-on-error: false
+    strategy:
+      matrix:
+        version:
+          - 'min'
+          - '1.11'
+          - '1'
+          # TODO(mhauru) Reenable the below once there is a 'pre' version different from '1'.
+          # - 'pre'
+        os:
+          - ubuntu-latest
+          - windows-latest
+          - macOS-latest
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: ${{ matrix.version }}
+
+      - uses: julia-actions/cache@v2
+
+      - uses: julia-actions/julia-buildpkg@v1
+
+      - uses: julia-actions/julia-runtest@v1
diff --git a/.github/workflows/Testing.yaml b/.github/workflows/Testing.yaml
deleted file mode 100644
index 36011f6a..00000000
--- a/.github/workflows/Testing.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-name: Libtask Testing
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-jobs:
-  test:
-    runs-on: ${{ matrix.os }}
-    continue-on-error: true # ${{ matrix.version == 'nightly' }}
-    strategy:
-      matrix:
-        version:
-          - 'min'
-          - '1'
-          # TODO(mhauru) Reenable the below once there is a 'pre' version different from '1'.
-          # - 'pre'
-        os:
-          - ubuntu-latest
-          - windows-latest
-          - macOS-latest
-        arch:
-          - x64
-          - x86
-        exclude:
-          - os: macOS-latest
-            arch: x86
-    steps:
-      - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v2
-        with:
-          version: ${{ matrix.version }}
-          arch: ${{ matrix.arch }}
-      - uses: actions/cache@v4
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
-      - uses: julia-actions/julia-buildpkg@latest
-      - uses: julia-actions/julia-runtest@latest

From f4eddb142c3b6d9b1f224840e0cac7f5ec059a19 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Sat, 21 Feb 2026 23:49:01 +0000
Subject: [PATCH 4/9] Format

---
 benchmarks/benchmark.jl | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/benchmarks/benchmark.jl b/benchmarks/benchmark.jl
index b69cdd5f..82f56251 100644
--- a/benchmarks/benchmark.jl
+++ b/benchmarks/benchmark.jl
@@ -33,11 +33,12 @@ function benchmark(f, x...)
     ratio = round(taped.time / baseline.time; digits=1)
     println(rpad("ratio", length(label)), "  ", "$(ratio)x")
     println()
+    return nothing
 end
 
 function rosenbrock(x, maybe_produce=identity)
     i = x[2:end]
-    j = x[1:(end-1)]
+    j = x[1:(end - 1)]
     ret = sum((1 .- j) .^ 2 + 100 * (i - j .^ 2) .^ 2)
     maybe_produce(ret)
     return ret
@@ -54,19 +55,21 @@ function ackley(x::AbstractVector, maybe_produce=identity)
         sum_sqrs += i^2
         maybe_produce(sum_sqrs)
     end
-    return -a * exp(b * sqrt(len_recip * sum_sqrs)) - exp(len_recip * sum_cos) + a + MathConstants.e
+    return -a * exp(b * sqrt(len_recip * sum_sqrs)) - exp(len_recip * sum_cos) +
+           a +
+           MathConstants.e
 end
 benchmark(ackley, rand(100_000))
 
 function matrix_test(x, maybe_produce=identity)
     n = 100
     a = reshape(x[1:(n^2)], n, n)
-    b = reshape(x[(n^2+1):(2n^2)], n, n)
+    b = reshape(x[(n^2 + 1):(2n^2)], n, n)
     ret = log.((a * b) + a - b)
     maybe_produce(ret)
     return ret
 end
-benchmark(matrix_test, collect(1.0:(2*100^2+100)))
+benchmark(matrix_test, collect(1.0:(2 * 100^2 + 100)))
 
 relu(x) = log.(1.0 .+ exp.(x))
 sigmoid(n) = 1.0 / (1.0 + exp(-n))

From 83cb7ba5bc4aa6948574b9196a2578ed9659f3c6 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Sat, 21 Feb 2026 23:58:38 +0000
Subject: [PATCH 5/9] Modernise integration test workflow and drop Turing.jl

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/IntegrationTest.yml | 43 +++++++++++----------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/IntegrationTest.yml b/.github/workflows/IntegrationTest.yml
index 8c1db475..54c6402c 100644
--- a/.github/workflows/IntegrationTest.yml
+++ b/.github/workflows/IntegrationTest.yml
@@ -1,4 +1,4 @@
-name: IntegrationTest
+name: Integration Test (AdvancedPS)
 
 on:
   push:
@@ -8,42 +8,33 @@ on:
 
 jobs:
   test:
-    name: ${{ matrix.package.repo }}
+    name: AdvancedPS.jl
     runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        package:
-          - {user: TuringLang, repo: AdvancedPS.jl, ref: main}
-          - {user: TuringLang, repo: Turing.jl, ref: main}
-
     steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v6
+
+      - uses: julia-actions/setup-julia@v2
         with:
-          version: 1
-          arch: x64
-      - uses: julia-actions/julia-buildpkg@latest
-      - name: Clone Downstream
-        uses: actions/checkout@v2
+          version: "1"
+
+      - uses: julia-actions/cache@v2
+
+      - uses: actions/checkout@v6
         with:
-          repository: ${{ matrix.package.user }}/${{ matrix.package.repo }}
-          ref: ${{ matrix.package.ref }}
+          repository: TuringLang/AdvancedPS.jl
+          ref: main
           path: downstream
-      - name: Load this and run the downstream tests
+
+      - name: Run downstream tests with this Libtask
         shell: julia --color=yes --project=downstream {0}
         run: |
           using Pkg
           try
-            # force it to use this PR's version of the package
-            Pkg.develop(PackageSpec(path="."))  # resolver may fail with main deps
+            Pkg.develop(PackageSpec(path="."))
             Pkg.update()
-            Pkg.test()  # resolver may fail with test time deps
+            Pkg.test()
           catch err
             err isa Pkg.Resolve.ResolverError || rethrow()
-            # If we can't resolve that means this is incompatible by SemVer and this is fine
-            # It means we marked this as a breaking change, so we don't need to worry about
-            # Mistakenly introducing a breaking change, as we have intentionally made one
             @info "Not compatible with this release. No problem." exception=err
-            exit(0)  # Exit immediately, as a success
+            exit(0)
           end

From 210f1fefd7fb8c8ee1aaacde435ecad908201f60 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Sun, 22 Feb 2026 00:16:53 +0000
Subject: [PATCH 6/9] Add Turing integration tests for SMC and PG samplers

Closes #208

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../{IntegrationTest.yml => AdvancedPS.yml}   |  0
 .github/workflows/Turing.yml                  | 56 +++++++++++++++++++
 test/integration/turing/Project.toml          |  8 +++
 test/integration/turing/main.jl               | 45 +++++++++++++++
 4 files changed, 109 insertions(+)
 rename .github/workflows/{IntegrationTest.yml => AdvancedPS.yml} (100%)
 create mode 100644 .github/workflows/Turing.yml
 create mode 100644 test/integration/turing/Project.toml
 create mode 100644 test/integration/turing/main.jl

diff --git a/.github/workflows/IntegrationTest.yml b/.github/workflows/AdvancedPS.yml
similarity index 100%
rename from .github/workflows/IntegrationTest.yml
rename to .github/workflows/AdvancedPS.yml
diff --git a/.github/workflows/Turing.yml b/.github/workflows/Turing.yml
new file mode 100644
index 00000000..f1e563d6
--- /dev/null
+++ b/.github/workflows/Turing.yml
@@ -0,0 +1,56 @@
+name: Turing integration tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+# needed to allow julia-actions/cache to delete old caches that it has created
+permissions:
+  actions: write
+  contents: read
+
+# Cancel existing tests on the same PR if a new commit is added to a pull request
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
+jobs:
+  turing:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os:
+          - ubuntu-latest
+          - macOS-latest
+          - windows-latest
+        version:
+          - 'min'
+          - '1.11'
+          - '1'
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: ${{ matrix.version }}
+
+      - uses: julia-actions/cache@v2
+
+      - name: Instantiate
+        id: instantiate
+        working-directory: test/integration/turing
+        continue-on-error: true
+        run: julia --project=. --color=yes -e 'using Pkg; Pkg.instantiate()'
+
+      - name: Report incompatibility
+        if: steps.instantiate.outcome == 'failure'
+        run: |
+          echo "::warning::Turing is incompatible with the current version of Libtask. Skipping integration tests."
+          echo "This likely means Turing has an compat bound on Libtask that excludes the currently checked out version."
+
+      - name: Run Turing integration tests
+        if: steps.instantiate.outcome == 'success'
+        working-directory: test/integration/turing
+        run: julia --project=. --color=yes main.jl
diff --git a/test/integration/turing/Project.toml b/test/integration/turing/Project.toml
new file mode 100644
index 00000000..5c4ce38f
--- /dev/null
+++ b/test/integration/turing/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
+
+[sources]
+Libtask = {path = "../../../"}
diff --git a/test/integration/turing/main.jl b/test/integration/turing/main.jl
new file mode 100644
index 00000000..dfc9da50
--- /dev/null
+++ b/test/integration/turing/main.jl
@@ -0,0 +1,45 @@
+using Libtask: @might_produce
+using Turing: @model, sample, SMC, PG, Normal, mean
+using StableRNGs: StableRNG
+using Test: @test, @testset
+
+@model function f()
+    x ~ Normal()
+    y ~ Normal(x)
+    return 2.0 ~ Normal(y)
+end
+
+model = f()
+
+@testset "Turing integration" begin
+    @testset "SMC" begin
+        chain = sample(StableRNG(468), model, SMC(), 100; progress=false)
+        @test size(chain, 1) == 100
+        @test size(chain, 3) == 1
+    end
+
+    @testset "PG" begin
+        chain = sample(StableRNG(468), model, PG(10), 500; progress=false)
+        @test size(chain, 1) == 500
+        @test mean(chain[:x]) ≈ 2 / 3 atol = 0.2
+        @test mean(chain[:y]) ≈ 4 / 3 atol = 0.2
+    end
+
+    @testset "PG with keyword arguments" begin
+        @model function kwarg_demo(y; n=0.0)
+            x ~ Normal(n)
+            return y ~ Normal(x)
+        end
+
+        # Check that enabling `might_produce` does allow sampling
+        @might_produce kwarg_demo
+        chain = sample(StableRNG(468), kwarg_demo(5.0), PG(20), 500; progress=false)
+        @test mean(chain[:x]) ≈ 2.5 atol = 0.2
+
+        # Check that the keyword argument's value is respected
+        chain2 = sample(
+            StableRNG(468), kwarg_demo(5.0; n=10.0), PG(20), 500; progress=false
+        )
+        @test mean(chain2[:x]) ≈ 7.5 atol = 0.2
+    end
+end

From 8c66e9fef8b7831989aee850efa3b5b1be5206eb Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Sun, 22 Feb 2026 00:20:45 +0000
Subject: [PATCH 7/9] increase iterations

---
 test/integration/turing/main.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/integration/turing/main.jl b/test/integration/turing/main.jl
index dfc9da50..087d390c 100644
--- a/test/integration/turing/main.jl
+++ b/test/integration/turing/main.jl
@@ -33,12 +33,12 @@ model = f()
 
         # Check that enabling `might_produce` does allow sampling
         @might_produce kwarg_demo
-        chain = sample(StableRNG(468), kwarg_demo(5.0), PG(20), 500; progress=false)
+        chain = sample(StableRNG(468), kwarg_demo(5.0), PG(20), 1000; progress=false)
         @test mean(chain[:x]) ≈ 2.5 atol = 0.2
 
         # Check that the keyword argument's value is respected
         chain2 = sample(
-            StableRNG(468), kwarg_demo(5.0; n=10.0), PG(20), 500; progress=false
+            StableRNG(468), kwarg_demo(5.0; n=10.0), PG(20), 1000; progress=false
         )
         @test mean(chain2[:x]) ≈ 7.5 atol = 0.2
     end

From d83165c8176deb0b202f758c5eabe91ae8353c47 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Sun, 22 Feb 2026 00:21:01 +0000
Subject: [PATCH 8/9] disable fail fast

---
 .github/workflows/CI.yml     | 2 +-
 .github/workflows/Turing.yml | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 7e7f6ed8..a463533f 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -8,7 +8,7 @@ on:
 jobs:
   test:
     runs-on: ${{ matrix.os }}
-    continue-on-error: false
+    fail-fast: false
     strategy:
       matrix:
         version:
diff --git a/.github/workflows/Turing.yml b/.github/workflows/Turing.yml
index f1e563d6..9740617d 100644
--- a/.github/workflows/Turing.yml
+++ b/.github/workflows/Turing.yml
@@ -19,6 +19,7 @@ concurrency:
 jobs:
   turing:
     runs-on: ${{ matrix.os }}
+    fail-fast: false
     strategy:
       matrix:
         os:

From 4b76adf9fe4ef8259e517b3a3f59bd3420be6afc Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Sun, 22 Feb 2026 00:24:41 +0000
Subject: [PATCH 9/9] fix

---
 .github/workflows/CI.yml     | 2 +-
 .github/workflows/Turing.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index a463533f..7450e3ab 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -8,8 +8,8 @@ on:
 jobs:
   test:
     runs-on: ${{ matrix.os }}
-    fail-fast: false
     strategy:
+      fail-fast: false
       matrix:
         version:
           - 'min'
diff --git a/.github/workflows/Turing.yml b/.github/workflows/Turing.yml
index 9740617d..28765c9c 100644
--- a/.github/workflows/Turing.yml
+++ b/.github/workflows/Turing.yml
@@ -19,8 +19,8 @@ concurrency:
 jobs:
   turing:
     runs-on: ${{ matrix.os }}
-    fail-fast: false
     strategy:
+      fail-fast: false
       matrix:
         os:
           - ubuntu-latest