Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
415 changes: 407 additions & 8 deletions eval/evaluate_browser_agent.py

Large diffs are not rendered by default.

595 changes: 370 additions & 225 deletions eval/evaluation_report.json

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions extension/src/__tests__/background-cleanup-regression.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ describe('Background cleanup regressions', () => {
);
});

test('final highlight attempt full-scans even when readiness is still not_ready', () => {
expect(backgroundSource).toContain(
'fullPageScanOnNotReady: attempt === maxHighlightAttempts',
);
expect(backgroundSource).toContain('const maxHighlightAttempts = 3;');
});

test('navigation defaults prime the page with a raw screenshot before highlight', () => {
expect(backgroundSource).toContain('async function runRawScreenshotPrime(');
expect(backgroundSource).toContain('primeWithRawScreenshot: true');
Expand Down
61 changes: 61 additions & 0 deletions extension/src/__tests__/highlight-detection.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,26 @@ describe('highlight-detection helpers', () => {
expect(script).toContain('evaluateLayoutReadiness');
});

test('buildHighlightDetectionScript can force full scans on not-ready pages', () => {
const script = buildHighlightDetectionScript({
elementType: 'any',
fullPageScanOnNotReady: true,
});

expect(script).toContain('"fullPageScanOnNotReady":true');
expect(script).toContain('config.fullPageScanOnNotReady === true');
expect(script).toContain('override=full_scan');
});

test('buildHighlightDetectionScript uses readiness snapshot instead of wait loop', () => {
const script = buildHighlightDetectionScript({ elementType: 'any' });

expect(script).toContain('function evaluateReadinessSnapshot');
expect(script).toContain('function getCandidateElementsForScan');
expect(script).toContain("layoutStability.state !== 'not_ready'");
expect(script).toContain('config.fullPageScanOnNotReady === true');
expect(script).toContain('const NOT_READY_SCAN_LIMIT = 500;');
expect(script).toContain("trace('scan:capped'");
expect(script).toContain('readiness:snapshot');
expect(script).not.toContain('function waitForLayoutStability');
});
Expand All @@ -143,6 +159,51 @@ describe('highlight-detection helpers', () => {
);
});

test('buildHighlightDetectionScript excludes visually hidden screen-reader nodes', () => {
const script = buildHighlightDetectionScript({ elementType: 'any' });
const start = script.indexOf('function isElementVisibleForDetection');
const end = script.indexOf(
'function isElementInViewportForDetection',
start,
);
const visibilitySource = script.slice(start, end);

expect(script).toContain('const VISUALLY_HIDDEN_TOKEN_REGEX');
expect(script).toContain('sr-only');
expect(script).toContain('screen-reader');
expect(script).toContain('visually-hidden');
expect(script).toContain('function isVisuallyHiddenForDetection');
expect(visibilitySource).toContain('isVisuallyHiddenForDetection(el)');
});

test('buildHighlightDetectionScript excludes truncation-only scrollable false positives', () => {
const script = buildHighlightDetectionScript({ elementType: 'scrollable' });
const truncationStart = script.indexOf(
'function isLikelyTextTruncationContainer',
);
const truncationEnd = script.indexOf(
'function isScrollableCandidate',
truncationStart,
);
const truncationSource = script.slice(truncationStart, truncationEnd);
const scrollableStart = script.indexOf('function isScrollableCandidate');
const scrollableEnd = script.indexOf(
'function isHoverableCandidate',
scrollableStart,
);
const scrollableSource = script.slice(scrollableStart, scrollableEnd);

expect(truncationSource).toContain("style.textOverflow === 'ellipsis'");
expect(truncationSource).toContain("style.whiteSpace === 'nowrap'");
expect(truncationSource).toContain('countVisibleElementChildren(el, 2)');
expect(scrollableSource).toContain('isLikelyTextTruncationContainer(el)');
expect(scrollableSource).toContain('isSemanticControlElement(el)');
expect(scrollableSource).toContain(
"overflowX.includes('hidden') || overflowX.includes('clip')",
);
expect(scrollableSource).toContain('hasHorizontalSwipeLayout(el)');
});

test("buildHighlightDetectionScript keeps 'any' candidate selection across all element types", () => {
const script = buildHighlightDetectionScript({ elementType: 'any' });
const start = script.indexOf('function resolveElementCandidate');
Expand Down
8 changes: 4 additions & 4 deletions extension/src/background/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,6 @@ async function captureHighlightedPageState(
await tabManager.ensureTabManaged(tabId, conversationId);
tabManager.updateTabActivity(tabId, conversationId);

const detectionScript = buildHighlightDetectionScript({
elementType,
});

await runHighlightPreconditionWarmup({
tabId,
conversationId,
Expand All @@ -315,6 +311,10 @@ async function captureHighlightedPageState(

for (let attempt = 1; attempt <= maxHighlightAttempts; attempt++) {
console.log(`🔁 [${logLabel}] Attempt ${attempt}/${maxHighlightAttempts}`);
const detectionScript = buildHighlightDetectionScript({
elementType,
fullPageScanOnNotReady: attempt === maxHighlightAttempts,
});

const detectionResult = await javascript.executeJavaScript(
tabId,
Expand Down
Loading
Loading