@@ -21,7 +21,7 @@ import {
2121} from '../shared-interfaces.js' ;
2222import { UserFacingError } from '../utils/errors.js' ;
2323import { executeCommand } from '../utils/exec.js' ;
24- import { callWithTimeout } from '../utils/timeout.js' ;
24+ import { callWithTimeout , TimeoutError } from '../utils/timeout.js' ;
2525import { LocalExecutor } from './executors/local-executor.js' ;
2626import { startEvaluationTask } from './generate-eval-task.js' ;
2727import { prepareSummary } from './generate-summary.js' ;
@@ -145,55 +145,74 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
145145 for ( const rootPromptDef of promptsToProcess ) {
146146 allTasks . push (
147147 appConcurrencyQueue . add ( async ( ) => {
148- const evalID = await env . executor . initializeEval ( ) ;
149- let results : AssessmentResult [ ] | undefined ;
150-
151- try {
152- results = await callWithTimeout (
153- `Evaluation of ${ rootPromptDef . name } ` ,
154- async timeoutAbortSignal =>
155- startEvaluationTask (
156- options ,
157- evalID ,
158- env ,
159- autoraterLlm ,
160- cujGenerationLlm ,
161- rootPromptDef ,
162- combineAbortSignals (
163- allTasksAbortCtrl . signal ,
164- timeoutAbortSignal ,
165- options . abortSignal ,
148+ const evaluate = async ( ) => {
149+ const evalID = await env . executor . initializeEval ( ) ;
150+ let results : AssessmentResult [ ] | undefined ;
151+
152+ try {
153+ results = await callWithTimeout (
154+ `Evaluation of ${ rootPromptDef . name } ` ,
155+ async timeoutAbortSignal =>
156+ startEvaluationTask (
157+ options ,
158+ evalID ,
159+ env ,
160+ autoraterLlm ,
161+ cujGenerationLlm ,
162+ rootPromptDef ,
163+ combineAbortSignals (
164+ allTasksAbortCtrl . signal ,
165+ timeoutAbortSignal ,
166+ options . abortSignal ,
167+ ) ,
168+ workerConcurrencyQueue ,
169+ progress ,
166170 ) ,
167- workerConcurrencyQueue ,
168- progress ,
169- ) ,
170- // A timeout is used to prevent from stuck evaluations.
171- env . promptTimeoutMinutes ?? 10 ,
172- ) ;
173- return results ;
174- } catch ( e : unknown ) {
175- failedPrompts . push ( {
176- promptName : rootPromptDef . name ,
177- error : `${ e } ` ,
178- stack : e instanceof Error ? e . stack : undefined ,
179- } ) ;
180-
181- let details = `Error: ${ e } ` ;
182- if ( e instanceof Error && e . stack ) {
183- details += `\nStack: ${ e . stack } ` ;
171+ // A timeout is used to prevent from stuck evaluations.
172+ env . promptTimeoutMinutes ?? 10 ,
173+ ) ;
174+ return results ;
175+ } finally {
176+ // Gracefully finalize the eval. Errors in finalization should not propagate.
177+ try {
178+ await env . executor . finalizeEval ( evalID ) ;
179+ } catch ( e ) {
180+ progress . log ( rootPromptDef , 'error' , 'Failed to finalize eval' , `${ e } ` ) ;
181+ }
182+ progress . evalFinished ( rootPromptDef , results || [ ] ) ;
184183 }
184+ } ;
185185
186- progress . log ( rootPromptDef , 'error' , 'Failed to evaluate code' , details ) ;
187- return [ ] satisfies AssessmentResult [ ] ;
188- } finally {
189- // Gracefully finalize the eval. Errors in finalization should not propagate.
186+ // Retries + initial attempt.
187+ const maxAttempts = ( options . promptTimeoutRetries ?? 0 ) + 1 ;
188+ for ( let attemptIdx = 0 ; attemptIdx < maxAttempts ; attemptIdx ++ ) {
190189 try {
191- await env . executor . finalizeEval ( evalID ) ;
192- } catch ( e ) {
193- progress . log ( rootPromptDef , 'error' , 'Failed to finalize eval' , `${ e } ` ) ;
190+ return await evaluate ( ) ;
191+ } catch ( e : unknown ) {
192+ if ( e instanceof TimeoutError && attemptIdx < maxAttempts ) {
193+ continue ;
194+ }
195+
196+ failedPrompts . push ( {
197+ promptName : rootPromptDef . name ,
198+ error : `${ e } ` ,
199+ stack : e instanceof Error ? e . stack : undefined ,
200+ } ) ;
201+
202+ let details = `Error: ${ e } ` ;
203+ if ( e instanceof Error && e . stack ) {
204+ details += `\nStack: ${ e . stack } ` ;
205+ }
206+
207+ progress . log ( rootPromptDef , 'error' , 'Failed to evaluate code' , details ) ;
208+ return [ ] satisfies AssessmentResult [ ] ;
194209 }
195- progress . evalFinished ( rootPromptDef , results || [ ] ) ;
196210 }
211+
212+ throw new Error (
213+ `Unexpected code path. ` +
214+ `There were ${ maxAttempts } attempts for evaluating: ${ rootPromptDef . name } ` ,
215+ ) ;
197216 } ) ,
198217 ) ;
199218 }
0 commit comments