Skip to content

Commit 6298d76

Browse files
committed
Implement no-regression reliability
1 parent 55b5c01 commit 6298d76

3 files changed

Lines changed: 221 additions & 9 deletions

File tree

README.md

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,15 @@ Runs the benchmark suite and produces Markdown/JSON output in `bench/results/`:
177177
zig build bench-compare
178178
```
179179

180+
`run-benchmarks` now auto-refreshes the snapshot block below from `bench/results/latest.json`.
181+
180182
The benchmark harness includes a “fastest vs lol-html” parse-throughput gate in the `stable` profile. See `bench/README.md` for details.
181183

182-
### Latest Stable Snapshot
184+
### Latest Benchmark Snapshot
185+
186+
<!-- BENCHMARK_SNAPSHOT:START -->
183187

184-
Source: `bench/results/latest.md` (`stable` profile).
188+
Source: `bench/results/latest.json` (`stable` profile).
185189

186190
#### Parse Throughput Comparison (MB/s)
187191

@@ -193,24 +197,32 @@ Source: `bench/results/latest.md` (`stable` profile).
193197
| `w3-html52.html` | 973.60 | 884.27 | 747.58 | 199.74 |
194198
| `hn.html` | 1429.47 | 1280.50 | 864.01 | 225.05 |
195199

196-
#### Query Throughput (ours only)
200+
#### Query Match Throughput (ours)
201+
202+
| Case | strictest ops/s | strictest ns/op | fastest ops/s | fastest ns/op |
203+
|---|---:|---:|---:|---:|
204+
| `attr-heavy-button` | 145652845.18 | 6.87 | 145812129.82 | 6.86 |
205+
| `attr-heavy-nav` | 143301582.48 | 6.98 | 144100316.88 | 6.94 |
206+
207+
#### Cached Query Throughput (ours)
197208

198-
| Case | Runtime query-match (ops/s) | Runtime query-match (ns/op) | Compiled query (ops/s) | Compiled query (ns/op) |
209+
| Case | strictest ops/s | strictest ns/op | fastest ops/s | fastest ns/op |
199210
|---|---:|---:|---:|---:|
200-
| `attr-heavy-button` | 145,812,129.82 | 6.86 | 214,695,936.88 | 4.66 |
201-
| `attr-heavy-nav` | 144,100,316.88 | 6.94 | 211,891,791.10 | 4.72 |
211+
| `attr-heavy-button` | 214695936.88 | 4.66 | 212517267.03 | 4.71 |
212+
| `attr-heavy-nav` | 211891791.10 | 4.72 | 203910597.44 | 4.90 |
202213

203214
#### Query Parse Throughput (ours)
204215

205216
| Selector case | Ops/s | ns/op |
206217
|---|---:|---:|
207-
| `simple` | 19,273,218.49 | 51.89 |
208-
| `complex` | 6,549,192.38 | 152.69 |
209-
| `grouped` | 7,544,814.12 | 132.54 |
218+
| `simple` | 19273218.49 | 51.89 |
219+
| `complex` | 6549192.38 | 152.69 |
220+
| `grouped` | 7544814.12 | 132.54 |
210221

211222
For full per-parser, per-fixture tables and gate output:
212223
- `bench/results/latest.md`
213224
- `bench/results/latest.json`
225+
<!-- BENCHMARK_SNAPSHOT:END -->
214226

215227
## Conformance
216228

bench/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ zig build tools -- run-benchmarks --profile stable
4848
zig build tools -- run-benchmarks --profile stable --write-baseline
4949
# compare against explicit baseline file
5050
zig build tools -- run-benchmarks --profile stable --baseline bench/results/baseline_stable.json
51+
# refresh README benchmark snapshot from existing latest.json
52+
zig build tools -- sync-readme-bench
5153
```
5254

5355
Or run the full setup + comparison from Zig build:
@@ -61,6 +63,8 @@ Results are written to:
6163
- `bench/results/latest.json`
6264
- `bench/results/latest.md`
6365

66+
`run-benchmarks` also updates the `README.md` benchmark snapshot block from `bench/results/latest.json`.
67+
6468
The benchmark output also includes a hard gate table:
6569

6670
- `PASS/FAIL: ours-fastest > lol-html` per fixture

src/tools/scripts.zig

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ const repeats: usize = 5;
1616
const StableParseMinRatio: f64 = 0.99;
1717
const StableQueryMinRatio: f64 = 0.99;
1818
const RegressionConfirmRuns: usize = 3;
19+
const ReadmeBenchmarkStartMarker = "<!-- BENCHMARK_SNAPSHOT:START -->";
20+
const ReadmeBenchmarkEndMarker = "<!-- BENCHMARK_SNAPSHOT:END -->";
1921

2022
const ParserCapability = struct {
2123
parser: []const u8,
@@ -288,6 +290,27 @@ const GateRow = struct {
288290
pass: bool,
289291
};
290292

293+
const ReadmeParseResult = struct {
294+
parser: []const u8,
295+
fixture: []const u8,
296+
throughput_mb_s: f64,
297+
};
298+
299+
const ReadmeQueryResult = struct {
300+
parser: []const u8,
301+
case: []const u8,
302+
ops_s: f64,
303+
ns_per_op: f64,
304+
};
305+
306+
const ReadmeBenchSnapshot = struct {
307+
profile: []const u8,
308+
parse_results: []const ReadmeParseResult,
309+
query_parse_results: []const ReadmeQueryResult,
310+
query_match_results: []const ReadmeQueryResult,
311+
query_compiled_results: []const ReadmeQueryResult,
312+
};
313+
291314
fn runnerCmdParse(alloc: std.mem.Allocator, parser_name: []const u8, fixture: []const u8, iterations: usize) ![]const []const u8 {
292315
const iter_s = try std.fmt.allocPrint(alloc, "{d}", .{iterations});
293316
if (std.mem.eql(u8, parser_name, "ours-strictest")) {
@@ -477,6 +500,172 @@ fn findParseThroughput(rows: []const ParseResult, parser_name: []const u8, fixtu
477500
return null;
478501
}
479502

503+
fn findReadmeParseThroughput(rows: []const ReadmeParseResult, parser_name: []const u8, fixture_name: []const u8) ?f64 {
504+
for (rows) |row| {
505+
if (std.mem.eql(u8, row.parser, parser_name) and std.mem.eql(u8, row.fixture, fixture_name)) {
506+
return row.throughput_mb_s;
507+
}
508+
}
509+
return null;
510+
}
511+
512+
fn findReadmeQuery(rows: []const ReadmeQueryResult, parser_name: []const u8, case_name: []const u8) ?ReadmeQueryResult {
513+
for (rows) |row| {
514+
if (std.mem.eql(u8, row.parser, parser_name) and std.mem.eql(u8, row.case, case_name)) return row;
515+
}
516+
return null;
517+
}
518+
519+
fn appendUniqueString(list: *std.ArrayList([]const u8), alloc: std.mem.Allocator, value: []const u8) !void {
520+
for (list.items) |it| {
521+
if (std.mem.eql(u8, it, value)) return;
522+
}
523+
try list.append(alloc, value);
524+
}
525+
526+
fn writeMaybeF64(w: anytype, value: ?f64) !void {
527+
if (value) |v| {
528+
try w.print("{d:.2}", .{v});
529+
} else {
530+
try w.writeAll("-");
531+
}
532+
}
533+
534+
fn renderReadmeBenchmarkSection(alloc: std.mem.Allocator, snap: ReadmeBenchSnapshot) ![]u8 {
535+
var out = std.ArrayList(u8).empty;
536+
errdefer out.deinit(alloc);
537+
const w = out.writer(alloc);
538+
539+
var fixtures = std.ArrayList([]const u8).empty;
540+
defer fixtures.deinit(alloc);
541+
for (snap.parse_results) |row| {
542+
try appendUniqueString(&fixtures, alloc, row.fixture);
543+
}
544+
545+
var query_match_cases = std.ArrayList([]const u8).empty;
546+
defer query_match_cases.deinit(alloc);
547+
for (snap.query_match_results) |row| {
548+
try appendUniqueString(&query_match_cases, alloc, row.case);
549+
}
550+
551+
var query_parse_cases = std.ArrayList([]const u8).empty;
552+
defer query_parse_cases.deinit(alloc);
553+
for (snap.query_parse_results) |row| {
554+
try appendUniqueString(&query_parse_cases, alloc, row.case);
555+
}
556+
557+
try w.print("Source: `bench/results/latest.json` (`{s}` profile).\n\n", .{snap.profile});
558+
559+
try w.writeAll("#### Parse Throughput Comparison (MB/s)\n\n");
560+
try w.writeAll("| Fixture | ours-fastest | ours-strictest | lol-html | lexbor |\n");
561+
try w.writeAll("|---|---:|---:|---:|---:|\n");
562+
for (fixtures.items) |fixture| {
563+
try w.print("| `{s}` | ", .{fixture});
564+
try writeMaybeF64(w, findReadmeParseThroughput(snap.parse_results, "ours-fastest", fixture));
565+
try w.writeAll(" | ");
566+
try writeMaybeF64(w, findReadmeParseThroughput(snap.parse_results, "ours-strictest", fixture));
567+
try w.writeAll(" | ");
568+
try writeMaybeF64(w, findReadmeParseThroughput(snap.parse_results, "lol-html", fixture));
569+
try w.writeAll(" | ");
570+
try writeMaybeF64(w, findReadmeParseThroughput(snap.parse_results, "lexbor", fixture));
571+
try w.writeAll(" |\n");
572+
}
573+
574+
try w.writeAll("\n#### Query Match Throughput (ours)\n\n");
575+
try w.writeAll("| Case | strictest ops/s | strictest ns/op | fastest ops/s | fastest ns/op |\n");
576+
try w.writeAll("|---|---:|---:|---:|---:|\n");
577+
for (query_match_cases.items) |case_name| {
578+
const strictest = findReadmeQuery(snap.query_match_results, "ours-strictest", case_name);
579+
const fastest = findReadmeQuery(snap.query_match_results, "ours-fastest", case_name);
580+
try w.print("| `{s}` | ", .{case_name});
581+
try writeMaybeF64(w, if (strictest) |s| s.ops_s else null);
582+
try w.writeAll(" | ");
583+
try writeMaybeF64(w, if (strictest) |s| s.ns_per_op else null);
584+
try w.writeAll(" | ");
585+
try writeMaybeF64(w, if (fastest) |s| s.ops_s else null);
586+
try w.writeAll(" | ");
587+
try writeMaybeF64(w, if (fastest) |s| s.ns_per_op else null);
588+
try w.writeAll(" |\n");
589+
}
590+
591+
try w.writeAll("\n#### Cached Query Throughput (ours)\n\n");
592+
try w.writeAll("| Case | strictest ops/s | strictest ns/op | fastest ops/s | fastest ns/op |\n");
593+
try w.writeAll("|---|---:|---:|---:|---:|\n");
594+
for (query_match_cases.items) |case_name| {
595+
const strictest = findReadmeQuery(snap.query_compiled_results, "ours-strictest", case_name);
596+
const fastest = findReadmeQuery(snap.query_compiled_results, "ours-fastest", case_name);
597+
try w.print("| `{s}` | ", .{case_name});
598+
try writeMaybeF64(w, if (strictest) |s| s.ops_s else null);
599+
try w.writeAll(" | ");
600+
try writeMaybeF64(w, if (strictest) |s| s.ns_per_op else null);
601+
try w.writeAll(" | ");
602+
try writeMaybeF64(w, if (fastest) |s| s.ops_s else null);
603+
try w.writeAll(" | ");
604+
try writeMaybeF64(w, if (fastest) |s| s.ns_per_op else null);
605+
try w.writeAll(" |\n");
606+
}
607+
608+
try w.writeAll("\n#### Query Parse Throughput (ours)\n\n");
609+
try w.writeAll("| Selector case | Ops/s | ns/op |\n");
610+
try w.writeAll("|---|---:|---:|\n");
611+
for (query_parse_cases.items) |case_name| {
612+
const ours = findReadmeQuery(snap.query_parse_results, "ours", case_name) orelse
613+
findReadmeQuery(snap.query_parse_results, "ours-strictest", case_name) orelse
614+
findReadmeQuery(snap.query_parse_results, "ours-fastest", case_name);
615+
try w.print("| `{s}` | ", .{case_name});
616+
try writeMaybeF64(w, if (ours) |r| r.ops_s else null);
617+
try w.writeAll(" | ");
618+
try writeMaybeF64(w, if (ours) |r| r.ns_per_op else null);
619+
try w.writeAll(" |\n");
620+
}
621+
622+
try w.writeAll("\nFor full per-parser, per-fixture tables and gate output:\n");
623+
try w.writeAll("- `bench/results/latest.md`\n");
624+
try w.writeAll("- `bench/results/latest.json`\n");
625+
626+
return out.toOwnedSlice(alloc);
627+
}
628+
629+
fn updateReadmeBenchmarkSnapshot(alloc: std.mem.Allocator) !void {
630+
const latest_json = try common.readFileAlloc(alloc, "bench/results/latest.json");
631+
defer alloc.free(latest_json);
632+
633+
const parsed = try std.json.parseFromSlice(ReadmeBenchSnapshot, alloc, latest_json, .{
634+
.ignore_unknown_fields = true,
635+
});
636+
defer parsed.deinit();
637+
638+
const replacement = try renderReadmeBenchmarkSection(alloc, parsed.value);
639+
defer alloc.free(replacement);
640+
641+
const readme = try common.readFileAlloc(alloc, "README.md");
642+
defer alloc.free(readme);
643+
644+
const start = std.mem.indexOf(u8, readme, ReadmeBenchmarkStartMarker) orelse return error.ReadmeBenchMarkersMissing;
645+
const after_start = start + ReadmeBenchmarkStartMarker.len;
646+
const end = std.mem.indexOfPos(u8, readme, after_start, ReadmeBenchmarkEndMarker) orelse return error.ReadmeBenchMarkersMissing;
647+
648+
var out = std.ArrayList(u8).empty;
649+
defer out.deinit(alloc);
650+
try out.appendSlice(alloc, readme[0..after_start]);
651+
try out.appendSlice(alloc, "\n\n");
652+
try out.appendSlice(alloc, replacement);
653+
if (replacement.len == 0 or replacement[replacement.len - 1] != '\n') {
654+
try out.append(alloc, '\n');
655+
}
656+
if (readme[end - 1] != '\n') {
657+
try out.append(alloc, '\n');
658+
}
659+
try out.appendSlice(alloc, readme[end..]);
660+
661+
if (!std.mem.eql(u8, out.items, readme)) {
662+
try common.writeFile("README.md", out.items);
663+
std.debug.print("wrote README.md benchmark snapshot\n", .{});
664+
} else {
665+
std.debug.print("README.md benchmark snapshot already up-to-date\n", .{});
666+
}
667+
}
668+
480669
fn writeMarkdown(
481670
alloc: std.mem.Allocator,
482671
profile_name: []const u8,
@@ -1015,6 +1204,7 @@ fn runBenchmarks(alloc: std.mem.Allocator, args: []const []const u8) !void {
10151204
const md = try writeMarkdown(alloc, profile.name, parse_results.items, query_parse_results.items, query_match_results.items, query_compiled_results.items, gate_rows);
10161205
defer alloc.free(md);
10171206
try common.writeFile("bench/results/latest.md", md);
1207+
try updateReadmeBenchmarkSnapshot(alloc);
10181208

10191209
// Optional baseline behavior.
10201210
const baseline_default = try std.fmt.allocPrint(alloc, "bench/results/baseline_{s}.json", .{profile.name});
@@ -1948,6 +2138,7 @@ fn usage() void {
19482138
\\ htmlparser-tools setup-parsers
19492139
\\ htmlparser-tools setup-fixtures [--refresh]
19502140
\\ htmlparser-tools run-benchmarks [--profile quick|stable] [--baseline path] [--write-baseline]
2141+
\\ htmlparser-tools sync-readme-bench
19512142
\\ htmlparser-tools run-external-suites [--mode strictest|fastest|both] [--max-html5lib-cases N] [--json-out path]
19522143
\\ htmlparser-tools docs-check
19532144
\\ htmlparser-tools examples-check
@@ -1987,6 +2178,11 @@ pub fn main() !void {
19872178
try runBenchmarks(alloc, rest);
19882179
return;
19892180
}
2181+
if (std.mem.eql(u8, cmd, "sync-readme-bench")) {
2182+
if (rest.len != 0) return error.InvalidArgument;
2183+
try updateReadmeBenchmarkSnapshot(alloc);
2184+
return;
2185+
}
19902186
if (std.mem.eql(u8, cmd, "run-external-suites")) {
19912187
try runExternalSuites(alloc, rest);
19922188
return;

0 commit comments

Comments
 (0)