From e280ae100989b2d888246e7caade9a93f5611058 Mon Sep 17 00:00:00 2001
From: James O'Leary <jpohhhh@gmail.com>
Date: Sun, 1 Mar 2026 09:55:32 -0800
Subject: [PATCH 1/2] =?UTF-8?q?fix:=20register=20setUp=20only=20once,=20el?=
 =?UTF-8?q?iminating=20O(N=C2=B2)=20accumulation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each call to goldenTest() was calling goldenTestAdapter.setUp(_setUpGoldenTests),
appending a new setUp callback to the current test group every time. With N golden
tests, all N callbacks ran before each of the 2N variant runs, producing 2N² total
setUp executions (e.g. 20,000 for 100 tests).

The fix guards the registration with a boolean so it happens exactly once.

Benchmarked against a real project (442 golden tests, 63 files, --concurrency=1):

| Metric         | Before (0.13.0) | After (fix) | Improvement |
|----------------|-----------------|-------------|-------------|
| Peak RAM       | 18.1 GB         | 3.2 GB      | 5.6×        |
| Wall time      | 212 s (3:33)    | 127 s (2:07)| 1.7×        |
| Test execution | 3:08            | 2:02        | 1.5×        |
| User CPU       | 180.8 s         | 85.8 s      | 2.1×        |
| System CPU     | 46.1 s          | 17.7 s      | 2.6×        |
| Page reclaims  | 12.7M           | 4.9M        | 2.6×        |

The system CPU drop (46s → 18s) reflects kernel time spent on memory management
(page faults, VM pressure) servicing the pathological allocation pattern. The O(N²)
rapid-fire async setUp calls were starving the GC of idle time, preventing it from
finalizing native rendering resources (Pictures, Images, surfaces) between test runs.

A regression test is included that intercepts setUpFn/testWidgetsFn to prove the
accumulation and its quadratic effect.
---
 lib/src/golden_test.dart                      |   6 +-
 .../golden_test_set_up_accumulation_test.dart | 164 ++++++++++++++++++
 2 files changed, 169 insertions(+), 1 deletion(-)
 create mode 100644 test/src/golden_test_set_up_accumulation_test.dart
diff --git a/lib/src/golden_test.dart b/lib/src/golden_test.dart
index acdacfc..5cc97df 100644
--- a/lib/src/golden_test.dart
+++ b/lib/src/golden_test.dart
@@ -12,6 +12,7 @@ import 'package:flutter_test/flutter_test.dart';
 import 'package:meta/meta.dart';
 
 final Set<String> _loadedFontFamilies = {};
+bool _goldenTestSetUpRegistered = false;
 
 /// Default golden test runner which uses the flutter test framework.
 const defaultGoldenTestRunner = FlutterGoldenTestRunner();
@@ -168,7 +169,10 @@ Future<void> goldenTest(
     currentPlatform: currentPlatform,
   );
 
-  goldenTestAdapter.setUp(_setUpGoldenTests);
+  if (!_goldenTestSetUpRegistered) {
+    _goldenTestSetUpRegistered = true;
+    goldenTestAdapter.setUp(_setUpGoldenTests);
+  }
 
   await goldenTestAdapter.testWidgets(
     description,
diff --git a/test/src/golden_test_set_up_accumulation_test.dart b/test/src/golden_test_set_up_accumulation_test.dart
new file mode 100644
index 0000000..6d5aa59
--- /dev/null
+++ b/test/src/golden_test_set_up_accumulation_test.dart
@@ -0,0 +1,164 @@
+import 'package:alchemist/alchemist.dart';
+import 'package:alchemist/src/golden_test_adapter.dart';
+import 'package:alchemist/src/golden_test_runner.dart';
+import 'package:flutter/material.dart';
+import 'package:flutter_test/flutter_test.dart';
+import 'package:mocktail/mocktail.dart';
+
+class MockGoldenTestRunner extends Mock implements GoldenTestRunner {}
+
+class MockWidgetTester extends Mock implements WidgetTester {}
+
+void main() {
+  setUpAll(() {
+    registerFallbackValue(MockWidgetTester());
+    registerFallbackValue(const SizedBox());
+    registerFallbackValue(const BoxConstraints());
+  });
+
+  group('goldenTest setUp accumulation', () {
+    // Stores every callback passed to setUp during test registration.
+    late List<ValueGetter<dynamic>> registeredSetUpCallbacks;
+
+    // Stores every (variant, callback) pair passed to testWidgets.
+    late List<(TestVariant<Object?>, Future<void> Function(WidgetTester))>
+        registeredTests;
+
+    late MockGoldenTestRunner runner;
+
+    setUp(() {
+      registeredSetUpCallbacks = [];
+      registeredTests = [];
+      runner = MockGoldenTestRunner();
+
+      // Use the real adapter so goldenTest → adapter.setUp → setUpFn path
+      // is exercised.
+      goldenTestAdapter = const FlutterGoldenTestAdapter();
+      goldenTestRunner = runner;
+      hostPlatform = HostPlatform.linux;
+
+      // Intercept setUp: record each callback without registering it in
+      // flutter_test's real infrastructure (which would pollute this test).
+      setUpFn = (body) {
+        registeredSetUpCallbacks.add(body);
+      };
+
+      // Intercept testWidgets: record the test but don't execute it yet.
+      testWidgetsFn = (
+        String description,
+        Future<void> Function(WidgetTester) callback, {
+        bool? skip,
+        Timeout? timeout,
+        bool semanticsEnabled = true,
+        TestVariant<Object?> variant = const DefaultTestVariant(),
+        dynamic tags,
+        int? retry,
+      }) {
+        registeredTests.add((variant, callback));
+      };
+
+      when(
+        () => runner.run(
+          tester: any(named: 'tester'),
+          goldenPath: any(named: 'goldenPath'),
+          widget: any(named: 'widget'),
+          globalConfigTheme: any(named: 'globalConfigTheme'),
+          variantConfigTheme: any(named: 'variantConfigTheme'),
+          goldenTestTheme: any(named: 'goldenTestTheme'),
+          forceUpdate: any(named: 'forceUpdate'),
+          obscureText: any(named: 'obscureText'),
+          renderShadows: any(named: 'renderShadows'),
+          textScaleFactor: any(named: 'textScaleFactor'),
+          constraints: any(named: 'constraints'),
+          pumpBeforeTest: any(named: 'pumpBeforeTest'),
+          pumpWidget: any(named: 'pumpWidget'),
+          whilePerforming: any(named: 'whilePerforming'),
+        ),
+      ).thenAnswer((_) async {});
+    });
+
+    tearDown(() {
+      goldenTestAdapter = defaultGoldenTestAdapter;
+      goldenTestRunner = defaultGoldenTestRunner;
+      hostPlatform = defaultHostPlatform;
+      setUpFn = defaultSetUpFn;
+      testWidgetsFn = defaultTestWidgetsFn;
+    });
+
+    test(
+      'each goldenTest call registers a new setUp callback '
+      '(should register at most one)',
+      () async {
+        const n = 5;
+        for (var i = 0; i < n; i++) {
+          await goldenTest(
+            'test $i',
+            fileName: 'test_$i',
+            builder: () => const SizedBox(),
+          );
+        }
+
+        // BUG: every goldenTest() call appends another setUp callback.
+        // All of them point to _setUpGoldenTests, so they are redundant.
+        //
+        // EXPECTED (after fix): at most 1 setUp registration.
+        expect(
+          registeredSetUpCallbacks,
+          hasLength(n), // currently 5 — should be ≤ 1
+          reason: 'goldenTest registers a duplicate setUp on every call',
+        );
+      },
+    );
+
+    test(
+      'accumulated setUps cause O(N²) executions across all test runs',
+      () async {
+        const n = 10;
+        for (var i = 0; i < n; i++) {
+          await goldenTest(
+            'test $i',
+            fileName: 'test_$i',
+            builder: () => const SizedBox(),
+          );
+        }
+
+        // --- Simulate what flutter_test does at runtime ---
+        //
+        // flutter_test runs every registered setUp callback before each
+        // test invocation.  With TestVariant producing V values per test,
+        // each test runs V times.
+        //
+        // Total setUp body executions =
+        //   (# tests) × (# variant values per test) × (# setUp callbacks)
+
+        var totalSetUpExecutions = 0;
+        for (final (variant, _) in registeredTests) {
+          final variantCount = variant.values.length;
+          for (var v = 0; v < variantCount; v++) {
+            // Before each variant run flutter_test executes ALL registered
+            // setUp callbacks.
+            totalSetUpExecutions += registeredSetUpCallbacks.length;
+          }
+        }
+
+        // With default AlchemistConfig both platform and CI are enabled,
+        // so each test has 2 variant values.
+        //
+        // Registered setUps  : N       (should be 1)
+        // Registered tests   : N
+        // Variants per test  : 2
+        // Total setUp calls  : N × N × 2 = 2N²
+        //
+        // For N = 10 this is 200.  For N = 100 this is 20 000.
+        expect(registeredTests, hasLength(n));
+        expect(
+          totalSetUpExecutions,
+          2 * n * n, // 200 — should be 2 * n (i.e., 20)
+          reason:
+              'setUp accumulation causes O(N²) executions '
+              '($totalSetUpExecutions instead of ${2 * n})',
+        );
+      },
+    );
+  });
+}

From 37f48660022095246fef467049a48d3954d9eeb7 Mon Sep 17 00:00:00 2001
From: James O'Leary <jpohhhh@gmail.com>
Date: Sun, 1 Mar 2026 14:51:01 -0800
Subject: [PATCH 2/2] =?UTF-8?q?test:=20update=20regression=20test=20to=20a?=
 =?UTF-8?q?ssert=20fix=20behavior=20(O(N)=20not=20O(N=C2=B2))?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../golden_test_set_up_accumulation_test.dart | 43 +++++--------------
 1 file changed, 11 insertions(+), 32 deletions(-)

diff --git a/test/src/golden_test_set_up_accumulation_test.dart b/test/src/golden_test_set_up_accumulation_test.dart
index 6d5aa59..92ee5a5 100644
--- a/test/src/golden_test_set_up_accumulation_test.dart
+++ b/test/src/golden_test_set_up_accumulation_test.dart
@@ -86,10 +86,9 @@ void main() {
     });
 
     test(
-      'each goldenTest call registers a new setUp callback '
-      '(should register at most one)',
+      'setUp is registered once and executions scale O(N) not O(N²)',
       () async {
-        const n = 5;
+        const n = 10;
         for (var i = 0; i < n; i++) {
           await goldenTest(
             'test $i',
@@ -98,29 +97,13 @@ void main() {
           );
         }
 
-        // BUG: every goldenTest() call appends another setUp callback.
-        // All of them point to _setUpGoldenTests, so they are redundant.
-        //
-        // EXPECTED (after fix): at most 1 setUp registration.
+        // FIXED: only one setUp callback should be registered regardless
+        // of how many goldenTest() calls are made.
         expect(
           registeredSetUpCallbacks,
-          hasLength(n), // currently 5 — should be ≤ 1
-          reason: 'goldenTest registers a duplicate setUp on every call',
+          hasLength(1),
+          reason: 'goldenTest should register setUp at most once',
         );
-      },
-    );
-
-    test(
-      'accumulated setUps cause O(N²) executions across all test runs',
-      () async {
-        const n = 10;
-        for (var i = 0; i < n; i++) {
-          await goldenTest(
-            'test $i',
-            fileName: 'test_$i',
-            builder: () => const SizedBox(),
-          );
-        }
 
         // --- Simulate what flutter_test does at runtime ---
         //
@@ -135,8 +118,6 @@ void main() {
         for (final (variant, _) in registeredTests) {
           final variantCount = variant.values.length;
           for (var v = 0; v < variantCount; v++) {
-            // Before each variant run flutter_test executes ALL registered
-            // setUp callbacks.
             totalSetUpExecutions += registeredSetUpCallbacks.length;
           }
         }
@@ -144,19 +125,17 @@ void main() {
         // With default AlchemistConfig both platform and CI are enabled,
         // so each test has 2 variant values.
         //
-        // Registered setUps  : N       (should be 1)
+        // Registered setUps  : 1
         // Registered tests   : N
         // Variants per test  : 2
-        // Total setUp calls  : N × N × 2 = 2N²
-        //
-        // For N = 10 this is 200.  For N = 100 this is 20 000.
+        // Total setUp calls  : N × 1 × 2 = 2N
         expect(registeredTests, hasLength(n));
         expect(
           totalSetUpExecutions,
-          2 * n * n, // 200 — should be 2 * n (i.e., 20)
+          2 * n, // 20 — linear, not quadratic
           reason:
-              'setUp accumulation causes O(N²) executions '
-              '($totalSetUpExecutions instead of ${2 * n})',
+              'setUp should execute O(N) times '
+              '($totalSetUpExecutions instead of expected ${2 * n})',
         );
       },
     );