@@ -248,40 +248,61 @@ async function toolAnalyzeScreenshot(
248248 } ) ;
249249 }
250250
251- const stream = await openai . chat . completions . create ( {
252- model : VISION_MODEL ,
253- messages : [
254- { role : "system" , content : SCREENSHOT_SYSTEM_PROMPT } ,
255- {
256- role : "user" ,
257- content : [
258- {
259- type : "text" ,
260- text : `Bug description: ${ bugDescription . slice ( 0 , 500 ) } \n\nAnalyze this screenshot:` ,
261- } ,
251+ const SCREENSHOT_MAX_RETRIES = 3 ;
252+ const SCREENSHOT_BASE_DELAY_MS = 1000 ;
253+
254+ for ( let attempt = 1 ; attempt <= SCREENSHOT_MAX_RETRIES ; attempt ++ ) {
255+ try {
256+ const stream = await openai . chat . completions . create ( {
257+ model : VISION_MODEL ,
258+ messages : [
259+ { role : "system" , content : SCREENSHOT_SYSTEM_PROMPT } ,
262260 {
263- type : "image_url" ,
264- image_url : { url : base64Url , detail : "high" } ,
261+ role : "user" ,
262+ content : [
263+ {
264+ type : "text" ,
265+ text : `Bug description: ${ bugDescription . slice ( 0 , 500 ) } \n\nAnalyze this screenshot:` ,
266+ } ,
267+ {
268+ type : "image_url" ,
269+ image_url : { url : base64Url , detail : "high" } ,
270+ } ,
271+ ] ,
265272 } ,
266273 ] ,
267- } ,
268- ] ,
269- max_tokens : 300 ,
270- temperature : 0 ,
271- stream : true ,
272- } ) ;
274+ max_tokens : 300 ,
275+ temperature : 0 ,
276+ stream : true ,
277+ } ) ;
273278
274- const response = await collectStream ( stream ) ;
275- const content = response . message . content ?? "" ;
276- const jsonMatch = content . match ( / \{ [ \s \S ] * \} / ) ;
277- if ( jsonMatch ) {
278- return jsonMatch [ 0 ] ;
279+ const response = await collectStream ( stream ) ;
280+ const content = response . message . content ?? "" ;
281+ const jsonMatch = content . match ( / \{ [ \s \S ] * \} / ) ;
282+ if ( jsonMatch ) {
283+ return jsonMatch [ 0 ] ;
284+ }
285+ logger . warn ( { url, attempt } , "Code-verify: could not parse vision response — retrying" ) ;
286+ if ( attempt < SCREENSHOT_MAX_RETRIES ) {
287+ await new Promise ( ( r ) => setTimeout ( r , SCREENSHOT_BASE_DELAY_MS * attempt ) ) ;
288+ continue ;
289+ }
290+ return JSON . stringify ( { valid : true , reasoning : "Could not parse vision response after retries" , shows : "unknown" } ) ;
291+ } catch ( err ) {
292+ const msg = err instanceof Error ? err . message : String ( err ) ;
293+ logger . warn ( { err : msg , url, attempt } , "Code-verify: screenshot analysis failed" ) ;
294+ if ( attempt < SCREENSHOT_MAX_RETRIES ) {
295+ await new Promise ( ( r ) => setTimeout ( r , SCREENSHOT_BASE_DELAY_MS * attempt ) ) ;
296+ continue ;
297+ }
298+ return JSON . stringify ( { valid : true , reasoning : `Vision analysis failed after retries: ${ msg . slice ( 0 , 100 ) } ` , shows : "unknown" } ) ;
299+ }
279300 }
280- return JSON . stringify ( { valid : true , reasoning : "Could not parse vision response " , shows : "unknown" } ) ;
301+ return JSON . stringify ( { valid : true , reasoning : "Screenshot analysis exhausted retries " , shows : "unknown" } ) ;
281302 } catch ( err ) {
282303 const msg = err instanceof Error ? err . message : String ( err ) ;
283- logger . warn ( { err : msg , url } , "Code-verify: screenshot analysis failed" ) ;
284- return JSON . stringify ( { valid : true , reasoning : `Vision analysis failed: ${ msg . slice ( 0 , 100 ) } ` , shows : "unknown " } ) ;
304+ logger . warn ( { err : msg , url } , "Code-verify: screenshot download/processing failed" ) ;
305+ return JSON . stringify ( { valid : false , reasoning : `Screenshot processing failed: ${ msg . slice ( 0 , 100 ) } ` , shows : "error " } ) ;
285306 }
286307}
287308
@@ -473,22 +494,42 @@ Verify this bug: explore the code AND analyze all screenshots. Then call deliver
473494 { role : "user" , content : userMessage } ,
474495 ] ;
475496
497+ const LLM_MAX_RETRIES = 3 ;
498+ const LLM_BASE_DELAY_MS = 1000 ;
499+
476500 for ( let i = 0 ; i < CODE_VERIFY_MAX_ITERATIONS ; i ++ ) {
477- let assembled : Awaited < ReturnType < typeof collectStream > > ;
478- try {
479- const stream = await openai . chat . completions . create ( {
480- model : LLM_SCORING_MODEL ,
481- messages,
482- tools,
483- tool_choice : "auto" ,
484- temperature : 0 ,
485- max_tokens : 2000 ,
486- stream : true ,
487- } ) ;
488- assembled = await collectStream ( stream ) ;
489- } catch ( err ) {
490- logger . error ( { err, issueNumber, iteration : i } , "Code-verify: LLM call failed" ) ;
491- return { plausible : false , confidence : 0.8 , reasoning : "Code verification LLM call failed. Cannot confirm bug." } ;
501+ let assembled ! : Awaited < ReturnType < typeof collectStream > > ;
502+
503+ let llmSuccess = false ;
504+ for ( let attempt = 1 ; attempt <= LLM_MAX_RETRIES ; attempt ++ ) {
505+ try {
506+ const stream = await openai . chat . completions . create ( {
507+ model : LLM_SCORING_MODEL ,
508+ messages,
509+ tools,
510+ tool_choice : "auto" ,
511+ temperature : 0 ,
512+ max_tokens : 2000 ,
513+ stream : true ,
514+ } ) ;
515+ assembled = await collectStream ( stream ) ;
516+ llmSuccess = true ;
517+ break ;
518+ } catch ( err ) {
519+ const msg = err instanceof Error ? err . message : String ( err ) ;
520+ logger . warn (
521+ { err : msg , issueNumber, iteration : i , attempt, maxRetries : LLM_MAX_RETRIES } ,
522+ `Code-verify: LLM call failed (attempt ${ attempt } /${ LLM_MAX_RETRIES } )` ,
523+ ) ;
524+ if ( attempt < LLM_MAX_RETRIES ) {
525+ await new Promise ( ( r ) => setTimeout ( r , LLM_BASE_DELAY_MS * attempt ) ) ;
526+ }
527+ }
528+ }
529+
530+ if ( ! llmSuccess ) {
531+ logger . error ( { issueNumber, iteration : i } , "Code-verify: LLM call failed after all retries" ) ;
532+ throw new Error ( `Code verification LLM call failed after ${ LLM_MAX_RETRIES } retries for issue #${ issueNumber } ` ) ;
492533 }
493534
494535 const msg = assembled . message ;
@@ -566,21 +607,48 @@ Verify this bug: explore the code AND analyze all screenshots. Then call deliver
566607 continue ;
567608 }
568609
569- if ( ! msg . content ) {
570- messages . push ( {
571- role : "user" ,
572- content : "Continue investigating. Call deliver_code_verdict when ready." ,
573- } ) ;
574- continue ;
575- }
610+ // Agent responded with text but no tool call — nudge it to deliver verdict
611+ messages . push ( {
612+ role : "user" ,
613+ content :
614+ "You MUST now call deliver_code_verdict with your findings. " +
615+ "Do not explain further — call the tool immediately." ,
616+ } ) ;
617+ }
618+
619+ logger . warn ( { issueNumber } , "Code-verify: agent did not deliver verdict — retrying with forced tool_choice" ) ;
576620
577- break ;
621+ // Final forced attempt: explicitly require deliver_code_verdict
622+ try {
623+ const stream = await openai . chat . completions . create ( {
624+ model : LLM_SCORING_MODEL ,
625+ messages,
626+ tools,
627+ tool_choice : { type : "function" , function : { name : "deliver_code_verdict" } } ,
628+ temperature : 0 ,
629+ max_tokens : 1500 ,
630+ stream : true ,
631+ } ) ;
632+ const forced = await collectStream ( stream ) ;
633+ const tc = forced . message . tool_calls ?. [ 0 ] ;
634+ if ( tc && tc . function . name === "deliver_code_verdict" ) {
635+ let fnArgs : Record < string , unknown > ;
636+ try { fnArgs = JSON . parse ( tc . function . arguments ) ; } catch { fnArgs = { } ; }
637+ const result : CodeVerifyResult = {
638+ plausible : ( fnArgs . plausible as boolean ) ?? false ,
639+ confidence : ( fnArgs . confidence as number ) ?? 0.5 ,
640+ reasoning : ( fnArgs . reasoning as string ) ?? "No reasoning (forced verdict)." ,
641+ codeEvidence : fnArgs . code_evidence as string | undefined ,
642+ screenshotValid : fnArgs . screenshot_valid as boolean | undefined ,
643+ screenshotReasoning : fnArgs . screenshot_reasoning as string | undefined ,
644+ } ;
645+ logger . info ( { issueNumber, plausible : result . plausible , confidence : result . confidence } , "Code-verify: forced verdict delivered" ) ;
646+ return result ;
647+ }
648+ } catch ( err ) {
649+ const errMsg = err instanceof Error ? err . message : String ( err ) ;
650+ logger . error ( { err : errMsg , issueNumber } , "Code-verify: forced verdict call failed" ) ;
578651 }
579652
580- logger . warn ( { issueNumber } , "Code-verify: agent did not deliver verdict" ) ;
581- return {
582- plausible : false ,
583- confidence : 0.8 ,
584- reasoning : "Verification agent exhausted iterations without delivering a verdict. Bug unverified." ,
585- } ;
653+ throw new Error ( `Code verification agent failed to deliver verdict for issue #${ issueNumber } after ${ CODE_VERIFY_MAX_ITERATIONS } iterations` ) ;
586654}
0 commit comments