Skip to content

Commit 3ef6abe

Browse files
authored
Merge pull request #1 from SmallThingz/dev
Dev
2 parents 6709cc7 + ca35b5a commit 3ef6abe

39 files changed

+2534
-1678
lines changed

DOCUMENTATION.md

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ All examples are verified by running `zig build examples-check`
8383
- `lastChild()`
8484
- `nextSibling()`
8585
- `prevSibling()`
86-
- `children()` (borrowed `[]const u32` index view)
86+
- `children()` (iterator of wrapped child nodes; `collect(allocator)` returns an owned `[]Node`)
8787
- Text:
8888
- `innerText(allocator)` (borrowed or allocated depending on shape)
8989
- `innerTextWithOptions(allocator, TextOptions)`
@@ -193,49 +193,49 @@ Source: `bench/results/latest.json` (`stable` profile).
193193

194194
| Fixture | ours | lol-html | lexbor |
195195
|---|---:|---:|---:|
196-
| `rust-lang.html` | 1447.99 | 1474.65 | 332.72 |
197-
| `wiki-html.html` | 1645.45 | 1215.04 | 271.24 |
198-
| `mdn-html.html` | 2570.09 | 1879.00 | 404.50 |
199-
| `w3-html52.html` | 1064.19 | 764.62 | 199.22 |
200-
| `hn.html` | 1263.60 | 885.26 | 223.15 |
201-
| `python-org.html` | 1549.02 | 1356.21 | 284.19 |
202-
| `kernel-org.html` | 1440.47 | 1300.81 | 276.52 |
203-
| `gnu-org.html` | 1917.36 | 1482.15 | 317.74 |
204-
| `ziglang-org.html` | 1480.49 | 1257.62 | 291.72 |
205-
| `ziglang-doc-master.html` | 1122.44 | 987.16 | 214.23 |
206-
| `wikipedia-unicode-list.html` | 1247.00 | 1024.98 | 215.21 |
207-
| `whatwg-html-spec.html` | 1113.73 | 841.16 | 210.83 |
208-
| `synthetic-forms.html` | 1046.17 | 710.72 | 174.94 |
209-
| `synthetic-table-grid.html` | 768.56 | 622.31 | 152.86 |
210-
| `synthetic-list-nested.html` | 833.77 | 598.02 | 152.45 |
211-
| `synthetic-comments-doctype.html` | 1200.72 | 827.66 | 212.09 |
212-
| `synthetic-template-rich.html` | 628.02 | 444.34 | 134.10 |
213-
| `synthetic-whitespace-noise.html` | 1104.21 | 919.69 | 170.33 |
214-
| `synthetic-news-feed.html` | 835.27 | 577.95 | 144.46 |
215-
| `synthetic-ecommerce.html` | 787.72 | 556.51 | 151.95 |
216-
| `synthetic-forum-thread.html` | 839.48 | 579.84 | 143.06 |
196+
| `rust-lang.html` | 2132.70 | 1501.81 | 324.53 |
197+
| `wiki-html.html` | 1991.85 | 1220.28 | 267.50 |
198+
| `mdn-html.html` | 2939.75 | 1894.70 | 408.76 |
199+
| `w3-html52.html` | 956.25 | 754.98 | 196.30 |
200+
| `hn.html` | 1595.94 | 893.48 | 221.75 |
201+
| `python-org.html` | 2116.13 | 1354.79 | 280.79 |
202+
| `kernel-org.html` | 1979.68 | 1335.72 | 290.65 |
203+
| `gnu-org.html` | 2368.06 | 1490.41 | 313.42 |
204+
| `ziglang-org.html` | 1874.52 | 1299.04 | 289.84 |
205+
| `ziglang-doc-master.html` | 1431.90 | 1045.18 | 225.11 |
206+
| `wikipedia-unicode-list.html` | 1647.71 | 1081.56 | 226.67 |
207+
| `whatwg-html-spec.html` | 1344.31 | 892.66 | 219.37 |
208+
| `synthetic-forms.html` | 1396.29 | 781.68 | 189.28 |
209+
| `synthetic-table-grid.html` | 1086.37 | 724.33 | 169.46 |
210+
| `synthetic-list-nested.html` | 1190.11 | 652.22 | 161.58 |
211+
| `synthetic-comments-doctype.html` | 1862.19 | 943.51 | 223.61 |
212+
| `synthetic-template-rich.html` | 913.72 | 465.79 | 143.20 |
213+
| `synthetic-whitespace-noise.html` | 1592.74 | 1062.32 | 189.70 |
214+
| `synthetic-news-feed.html` | 1179.12 | 646.78 | 157.61 |
215+
| `synthetic-ecommerce.html` | 1134.48 | 637.67 | 163.21 |
216+
| `synthetic-forum-thread.html` | 1145.80 | 643.29 | 161.14 |
217217

218218
#### Query Match Throughput (ours)
219219

220220
| Case | ours ops/s | ours ns/op |
221221
|---|---:|---:|
222-
| `attr-heavy-button` | 1148936.76 | 870.37 |
223-
| `attr-heavy-nav` | 1130790.00 | 884.34 |
222+
| `attr-heavy-button` | 191877.12 | 5211.67 |
223+
| `attr-heavy-nav` | 87885.90 | 11378.39 |
224224

225225
#### Cached Query Throughput (ours)
226226

227227
| Case | ours ops/s | ours ns/op |
228228
|---|---:|---:|
229-
| `attr-heavy-button` | 1305257.78 | 766.13 |
230-
| `attr-heavy-nav` | 1347173.46 | 742.29 |
229+
| `attr-heavy-button` | 144352.80 | 6927.47 |
230+
| `attr-heavy-nav` | 120324.49 | 8310.86 |
231231

232232
#### Query Parse Throughput (ours)
233233

234234
| Selector case | Ops/s | ns/op |
235235
|---|---:|---:|
236-
| `simple` | 17335919.85 | 57.68 |
237-
| `complex` | 5836657.49 | 171.33 |
238-
| `grouped` | 6396371.26 | 156.34 |
236+
| `simple` | 10465915.34 | 95.55 |
237+
| `complex` | 4910511.63 | 203.64 |
238+
| `grouped` | 6290595.08 | 158.97 |
239239

240240
For full per-parser, per-fixture tables and gate output:
241241
- `bench/results/latest.md`

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ Source: `bench/results/latest.json` (`stable` profile).
2525
### Parse Throughput (Average Across Fixtures)
2626

2727
```text
28-
ours │████████████████████│ 1233.61 MB/s (100.00%)
29-
lol-html │████████████████░░░░│ 966.94 MB/s (78.38%)
30-
lexbor │███░░░░░░░░░░░░░░░░│ 222.74 MB/s (18.06%)
28+
ours │████████████████████│ 1613.31 MB/s (100.00%)
29+
lol-html │█████████████░░░░░░░│ 1015.34 MB/s (62.94%)
30+
lexbor │███░░░░░░░░░░░░░░░░░│ 229.69 MB/s (14.24%)
3131
```
3232

3333
### Conformance Snapshot

build.zig

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@ pub fn build(b: *std.Build) void {
2222
}),
2323
});
2424

25+
const parse_mode_mod = b.createModule(.{
26+
.root_source_file = b.path("tools/parse_mode.zig"),
27+
.target = target,
28+
.optimize = optimize,
29+
.imports = &.{
30+
.{ .name = "htmlparser", .module = mod },
31+
},
32+
});
33+
2534
const bench_exe = b.addExecutable(.{
2635
.name = "htmlparser-bench",
2736
.root_module = b.createModule(.{
@@ -30,6 +39,7 @@ pub fn build(b: *std.Build) void {
3039
.optimize = optimize,
3140
.imports = &.{
3241
.{ .name = "htmlparser", .module = mod },
42+
.{ .name = "parse_mode", .module = parse_mode_mod },
3343
},
3444
}),
3545
});
@@ -111,11 +121,13 @@ pub fn build(b: *std.Build) void {
111121

112122
const mod_tests = b.addTest(.{
113123
.root_module = mod,
124+
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
114125
});
115126
const run_mod_tests = b.addRunArtifact(mod_tests);
116127

117128
const exe_tests = b.addTest(.{
118129
.root_module = exe.root_module,
130+
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
119131
});
120132
const run_exe_tests = b.addRunArtifact(exe_tests);
121133

@@ -126,8 +138,17 @@ pub fn build(b: *std.Build) void {
126138
.optimize = optimize,
127139
.imports = &.{
128140
.{ .name = "htmlparser", .module = mod },
141+
.{ .name = "examples", .module = b.createModule(.{
142+
.root_source_file = b.path("examples/examples.zig"),
143+
.target = target,
144+
.optimize = optimize,
145+
.imports = &.{
146+
.{ .name = "htmlparser", .module = mod },
147+
},
148+
}) },
129149
},
130150
}),
151+
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
131152
});
132153
const run_examples_tests = b.addRunArtifact(examples_tests);
133154

@@ -140,14 +161,41 @@ pub fn build(b: *std.Build) void {
140161
.{ .name = "htmlparser", .module = mod },
141162
},
142163
}),
164+
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
143165
});
144166
const run_behavioral_tests = b.addRunArtifact(behavioral_tests);
145167

168+
const scripts_tests = b.addTest(.{
169+
.root_module = b.createModule(.{
170+
.root_source_file = b.path("tools/scripts.zig"),
171+
.target = target,
172+
.optimize = optimize,
173+
}),
174+
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
175+
});
176+
const run_scripts_tests = b.addRunArtifact(scripts_tests);
177+
178+
const bench_tests = b.addTest(.{
179+
.root_module = b.createModule(.{
180+
.root_source_file = b.path("tools/bench/bench.zig"),
181+
.target = target,
182+
.optimize = optimize,
183+
.imports = &.{
184+
.{ .name = "htmlparser", .module = mod },
185+
.{ .name = "parse_mode", .module = parse_mode_mod },
186+
},
187+
}),
188+
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
189+
});
190+
const run_bench_tests = b.addRunArtifact(bench_tests);
191+
146192
const test_step = b.step("test", "Run tests");
147193
test_step.dependOn(&run_mod_tests.step);
148194
test_step.dependOn(&run_exe_tests.step);
149195
test_step.dependOn(&run_examples_tests.step);
150196
test_step.dependOn(&run_behavioral_tests.step);
197+
test_step.dependOn(&run_scripts_tests.step);
198+
test_step.dependOn(&run_bench_tests.step);
151199

152200
const ship_check_step = b.step("ship-check", "Run release-readiness checks (test + docs + examples)");
153201
ship_check_step.dependOn(test_step);

examples/basic_parse_query.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ const html = @import("htmlparser");
33
const default_options: html.ParseOptions = .{};
44
const Document = default_options.GetDocument();
55

6-
fn run() !void {
6+
pub fn run() !void {
77
var doc = Document.init(std.testing.allocator);
88
defer doc.deinit();
99

examples/cached_selector.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ const html = @import("htmlparser");
33
const default_options: html.ParseOptions = .{};
44
const Document = default_options.GetDocument();
55

6-
fn run() !void {
6+
pub fn run() !void {
77
var doc = Document.init(std.testing.allocator);
88
defer doc.deinit();
99

@@ -20,7 +20,7 @@ fn run() !void {
2020
defer arena.deinit();
2121

2222
const sel = try html.Selector.compileRuntime(arena.allocator(), "a[href^=https][class~=button]");
23-
const first = doc.queryOneCached(&sel) orelse return error.TestUnexpectedResult;
23+
const first = doc.queryOneCached(sel) orelse return error.TestUnexpectedResult;
2424
try std.testing.expectEqualStrings("a1", first.getAttributeValue("id").?);
2525
}
2626

examples/debug_query_report.zig

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,19 @@ const html = @import("htmlparser");
33
const default_options: html.ParseOptions = .{};
44
const Document = default_options.GetDocument();
55

6-
fn run() !void {
6+
pub fn run() !void {
77
var doc = Document.init(std.testing.allocator);
88
defer doc.deinit();
99

1010
var input = "<div><a id='one' class='nav'></a><a id='two'></a></div>".*;
1111
try doc.parse(&input, .{});
1212

13-
var report: html.QueryDebugReport = .{};
14-
const node = try doc.queryOneRuntimeDebug("a[href^=https]", &report);
15-
try std.testing.expect(node == null);
16-
try std.testing.expect(report.visited_elements > 0);
17-
try std.testing.expect(report.near_miss_len > 0);
18-
try std.testing.expect(report.near_misses[0].reason.kind != .none);
13+
const result = doc.queryOneRuntimeDebug("a[href^=https]");
14+
try std.testing.expect(result.err == null);
15+
try std.testing.expect(result.node == null);
16+
try std.testing.expect(result.report.visited_elements > 0);
17+
try std.testing.expect(result.report.near_miss_len > 0);
18+
try std.testing.expect(result.report.near_misses[0].reason.kind != .none);
1919
}
2020

2121
test "query debug report for selector mismatch" {

examples/examples.zig

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
pub const basic_parse_query = @import("basic_parse_query.zig");
2+
pub const runtime_selector = @import("runtime_selector.zig");
3+
pub const cached_selector = @import("cached_selector.zig");
4+
pub const navigation_and_children = @import("navigation_and_children.zig");
5+
pub const inner_text_options = @import("inner_text_options.zig");
6+
pub const strict_vs_fastest_parse = @import("strict_vs_fastest_parse.zig");
7+
pub const debug_query_report = @import("debug_query_report.zig");
8+
pub const instrumentation_hooks = @import("instrumentation_hooks.zig");
9+
pub const query_time_decode = @import("query_time_decode.zig");

examples/inner_text_options.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ const html = @import("htmlparser");
33
const default_options: html.ParseOptions = .{};
44
const Document = default_options.GetDocument();
55

6-
fn run() !void {
6+
pub fn run() !void {
77
var doc = Document.init(std.testing.allocator);
88
defer doc.deinit();
99

examples/instrumentation_hooks.zig

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,17 @@ const Hooks = struct {
2626
}
2727
};
2828

29-
fn run() !void {
29+
pub fn run() !void {
3030
var doc = Document.init(std.testing.allocator);
3131
defer doc.deinit();
3232

3333
var hooks: Hooks = .{};
3434
var input = "<div><span id='x'></span></div>".*;
35-
try html.parseWithHooks(&doc, &input, .{}, &hooks);
35+
try html.parseWithHooks(std.testing.io, &doc, &input, .{}, &hooks);
3636
try std.testing.expectEqual(@as(usize, 1), hooks.parse_start_calls);
3737
try std.testing.expectEqual(@as(usize, 1), hooks.parse_end_calls);
3838

39-
_ = try html.queryOneRuntimeWithHooks(&doc, "span#x", &hooks);
39+
_ = try html.queryOneRuntimeWithHooks(std.testing.io, &doc, "span#x", &hooks);
4040
try std.testing.expectEqual(@as(usize, 1), hooks.query_start_calls);
4141
try std.testing.expectEqual(@as(usize, 1), hooks.query_end_calls);
4242
}

examples/navigation_and_children.zig

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ const html = @import("htmlparser");
33
const default_options: html.ParseOptions = .{};
44
const Document = default_options.GetDocument();
55

6-
fn run() !void {
6+
pub fn run() !void {
77
var doc = Document.init(std.testing.allocator);
88
defer doc.deinit();
99

@@ -18,13 +18,10 @@ fn run() !void {
1818
try std.testing.expectEqualStrings("body", last.getAttributeValue("id").?);
1919

2020
var children = main.children();
21-
var child_indexes: std.ArrayListUnmanaged(u32) = .{};
22-
defer child_indexes.deinit(std.testing.allocator);
23-
try children.collect(std.testing.allocator, &child_indexes);
24-
try std.testing.expectEqual(@as(usize, 3), child_indexes.items.len);
25-
const first_idx = child_indexes.items[0];
26-
const first_via_index = main.doc.nodeAt(first_idx) orelse return error.TestUnexpectedResult;
27-
try std.testing.expectEqualStrings("title", first_via_index.getAttributeValue("id").?);
21+
const child_nodes = try children.collect(std.testing.allocator);
22+
defer std.testing.allocator.free(child_nodes);
23+
try std.testing.expectEqual(@as(usize, 3), child_nodes.len);
24+
try std.testing.expectEqualStrings("title", child_nodes[0].getAttributeValue("id").?);
2825
}
2926

3027
test "navigation and children iterator" {

0 commit comments

Comments
 (0)