Skip to content

Commit 37b5575

Browse files
authored
Merge pull request #47 from softpudding/codex/highlight-page-pagination
Make highlight pagination independent of snapshot ids
2 parents 980f0bb + 4576941 commit 37b5575

26 files changed

+328
-812
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ repos:
1111
hooks:
1212
- id: black
1313
name: black
14-
entry: uv run black
14+
entry: uv run --extra dev black
1515
language: system
1616
types_or: [python, pyi]
1717
require_serial: true

AGENTS.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ Elements are paginated to ensure **no visual overlap** in each screenshot:
204204

205205
- `highlight_elements` now uses a **snapshot-first** readiness check instead of page-side polling loops.
206206
- Reason: OpenBrowser intentionally keeps automated tabs in the browser background, and Chrome may heavily throttle hidden-tab timers. A page-side `setTimeout` stability loop can therefore take far longer than its nominal budget and become the main cause of highlight timeouts.
207+
- In practice, the main cause of unstable first-highlight screenshots is often **missing warmup**, not a bad readiness classifier. A background tab may answer lightweight `Runtime.evaluate` probes while still sitting in a partially painted / partially decoded state.
208+
- A screenshot-style warmup is therefore the default precondition for `highlight_elements`. It helps force hidden-tab paint/compositor/image-decode work before interactive-element detection runs.
209+
- If `highlight_elements` keeps returning `not_ready` but `tab view` immediately makes the next highlight succeed, treat that as a warmup issue first.
207210
- The extension samples viewport readiness signals once per attempt: document readiness, viewport text/media density, pending images, and loading placeholders such as skeleton/shimmer/spinner indicators.
208211
- Readiness is graded as `ready`, `provisionally_ready`, or `not_ready`.
209212
- If readiness is `not_ready`, the extension performs only a couple of short **background-side** retries before proceeding or returning the latest result.
@@ -213,7 +216,7 @@ Elements are paginated to ensure **no visual overlap** in each screenshot:
213216
```
214217
# Highlight mixed elements first (default)
215218
highlight_elements() → Page 1 of any interactive elements
216-
highlight_elements(page=2) → Page 2 of the same any inventory
219+
highlight_elements(page=2) → Page 2 of the current page state's any results
217220
highlight_elements(element_type="any", page=1) → Explicit any-first discovery
218221
219222
# Highlight other types (one at a time)
@@ -312,6 +315,8 @@ cd extension && npm run build
312315

313316
OpenBrowser has explicit screenshot control for maximum flexibility:
314317

318+
- Screenshots also serve as a practical page warmup mechanism for background tabs. They can unblock page paint and media decode work that passive DOM/readiness inspection does not reliably trigger on its own.
319+
315320
### Commands That Return Screenshots
316321

317322
| Command | Auto-Screenshot | Notes |

eval/bluebook/js/bluebook.js

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,12 @@ window.tracker = new AgentTracker('bluebook.life', 'hard');
612612
return notes;
613613
}
614614

615+
function stabilizeDefaultFeedOrder(notes) {
616+
keepNoteAwayFromTop(notes, 'note-openclaw-config', 18);
617+
keepNoteAwayFromTop(notes, 'note-arigato-ai', 24);
618+
return notes;
619+
}
620+
615621
function getCurrentNote() {
616622
return state.notes.find((note) => note.id === state.currentNoteId) || null;
617623
}
@@ -1059,7 +1065,7 @@ window.tracker = new AgentTracker('bluebook.life', 'hard');
10591065
state.notes[swapIndex] = temp;
10601066
}
10611067

1062-
keepNoteAwayFromTop(state.notes, 'note-openclaw-config', 18);
1068+
stabilizeDefaultFeedOrder(state.notes);
10631069
}
10641070

10651071
function handleFeedReload() {
@@ -1467,7 +1473,7 @@ window.tracker = new AgentTracker('bluebook.life', 'hard');
14671473

14681474
function initialize() {
14691475
state.notes = buildNotes();
1470-
keepNoteAwayFromTop(state.notes, 'note-openclaw-config', 18);
1476+
stabilizeDefaultFeedOrder(state.notes);
14711477
state.query = getSearchQueryFromUrl();
14721478

14731479
cacheDom();

eval/dataset/cloudstack.yaml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@ criteria:
2121
event_type: click
2222
element_id: "das-agent-toggle"
2323
page: "/cloudstack/das.html"
24-
alternative:
25-
event_type: click
26-
element_id: "open-chat-btn"
27-
page: "/cloudstack/das.html"
24+
alternatives:
25+
- event_type: click
26+
element_id: "open-chat-btn"
27+
page: "/cloudstack/das.html"
28+
- event_type: click
29+
element_text: "DAS Agent"
30+
parent_text_contains: "AI"
31+
page: "/cloudstack/das.html"
2832
- type: greet_das_agent
2933
description: "Send a greeting message to DAS agent"
3034
points: 1
@@ -40,4 +44,4 @@ criteria:
4044
event_type: click
4145
element_id: "send-btn"
4246
page: "/cloudstack/das.html"
43-
optional: true
47+
optional: true

eval/dataset/cloudstack_interactive.yaml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,14 @@ criteria:
2424
event_type: click
2525
element_id: "das-agent-toggle"
2626
page: "/cloudstack/das.html"
27-
alternative:
28-
event_type: click
29-
element_id: "open-chat-btn"
30-
page: "/cloudstack/das.html"
27+
alternatives:
28+
- event_type: click
29+
element_id: "open-chat-btn"
30+
page: "/cloudstack/das.html"
31+
- event_type: click
32+
element_text: "DAS Agent"
33+
parent_text_contains: "AI"
34+
page: "/cloudstack/das.html"
3135

3236
# New: Send initial greeting
3337
- type: greet_das_agent
@@ -114,4 +118,4 @@ criteria:
114118
event_type: count_min
115119
condition: "chat_interactions"
116120
count: 3
117-
page: "/cloudstack/das.html"
121+
page: "/cloudstack/das.html"

eval/evaluate_browser_agent.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -610,17 +610,15 @@ def start_openbrowser(self) -> bool:
610610
return True
611611

612612
root_dir = EVAL_DIR.parent
613-
logger.error(
614-
f"""
613+
logger.error(f"""
615614
❌ OpenBrowser server is not running!
616615
Please start the OpenBrowser server manually with:
617616
618617
cd {root_dir}
619618
uv run local-chrome-server serve
620619
621620
The server should start on port 8765 (REST API) and 8766 (WebSocket).
622-
"""
623-
)
621+
""")
624622
return False
625623

626624
except Exception as e:
@@ -637,8 +635,7 @@ def start_eval_server(self) -> bool:
637635

638636
eval_dir = EVAL_DIR
639637
root_dir = EVAL_DIR.parent
640-
logger.error(
641-
f"""
638+
logger.error(f"""
642639
❌ Eval server is not running!
643640
Please start the eval server manually with:
644641
@@ -650,8 +647,7 @@ def start_eval_server(self) -> bool:
650647
uv run python eval/server.py
651648
652649
The server should start on port 16605.
653-
"""
654-
)
650+
""")
655651
return False
656652

657653
except Exception as e:
@@ -1304,6 +1300,7 @@ def _evaluate_criteria(
13041300
expected = criterion.get("expected")
13051301
points = criterion.get("points", 1)
13061302
alternative = criterion.get("alternative")
1303+
alternatives = criterion.get("alternatives", [])
13071304
optional = criterion.get("optional", False)
13081305

13091306
# For optional criteria, we give the points automatically (treat as satisfied)
@@ -1314,9 +1311,15 @@ def _evaluate_criteria(
13141311
)
13151312
continue
13161313

1317-
if self._check_criterion(expected, track_events, sse_events) or (
1318-
alternative
1319-
and self._check_criterion(alternative, track_events, sse_events)
1314+
candidate_expectations = [expected]
1315+
if alternative:
1316+
candidate_expectations.append(alternative)
1317+
if alternatives:
1318+
candidate_expectations.extend(alternatives)
1319+
1320+
if any(
1321+
candidate and self._check_criterion(candidate, track_events, sse_events)
1322+
for candidate in candidate_expectations
13201323
):
13211324
score += points
13221325
logger.debug(

0 commit comments

Comments
 (0)