8383AGENTIC_TESTS = [
8484 ("agentic_code_search" ,
8585 "How is configuration loaded in this codebase? Find all config loading mechanisms." ,
86- 60 ),
86+ 300 ),
8787
8888 ("agentic_dependency_analysis" ,
8989 "Analyze the dependency chain for the AgenticOrchestrator. What does it depend on?" ,
90- 60 ),
90+ 300 ),
9191
9292 ("agentic_call_chain_analysis" ,
9393 "Trace the call chain from execute_agentic_workflow to the graph analysis tools" ,
94- 60 ),
94+ 300 ),
9595
9696 ("agentic_architecture_analysis" ,
9797 "Analyze the architecture of the MCP server. Find coupling metrics and hub nodes." ,
98- 90 ),
98+ 300 ),
9999
100100 ("agentic_api_surface_analysis" ,
101101 "What is the public API surface of the GraphToolExecutor?" ,
102- 60 ),
102+ 300 ),
103103
104104 ("agentic_context_builder" ,
105105 "Gather comprehensive context about the tier-aware prompt selection system" ,
106- 90 ),
106+ 300 ),
107107
108108 ("agentic_semantic_question" ,
109109 "How does the LRU cache work in GraphToolExecutor? What gets cached and when?" ,
110- 60 ),
110+ 300 ),
111111]
112112
113+ # Create output directory for test logs
114+ TEST_OUTPUT_DIR = Path (__file__ ).resolve ().parent / "test_output"
115+ TEST_OUTPUT_DIR .mkdir (exist_ok = True )
116+
113117
114118def print_config ():
115119 """Print resolved configuration."""
@@ -146,6 +150,8 @@ def print_config():
146150 db = os .environ .get ("CODEGRAPH_SURREALDB_DATABASE" , "codegraph" )
147151 print (f"\n SurrealDB: { url } " )
148152 print (f" Namespace/DB: { ns } /{ db } " )
153+
154+ print (f"\n 📁 Test Output Directory: { TEST_OUTPUT_DIR } " )
149155 print ("=" * 72 )
150156
151157
@@ -222,8 +228,24 @@ async def run_stdio_tests():
222228 print (f" Query: { query [:60 ]} ..." )
223229 print (f" Timeout: { timeout } s" )
224230
231+ # Create log file for this test
232+ timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
233+ log_file = TEST_OUTPUT_DIR / f"{ idx :02d} _{ tool_name } _{ timestamp } .log"
234+
225235 start_time = asyncio .get_event_loop ().time ()
226236
237+ # Write input to log file
238+ with open (log_file , "w" , encoding = "utf-8" ) as f :
239+ f .write ("=" * 80 + "\n " )
240+ f .write (f"Test: { tool_name } \n " )
241+ f .write (f"Timestamp: { timestamp } \n " )
242+ f .write (f"Timeout: { timeout } s\n " )
243+ f .write ("=" * 80 + "\n \n " )
244+ f .write ("INPUT QUERY:\n " )
245+ f .write ("-" * 80 + "\n " )
246+ f .write (query + "\n " )
247+ f .write ("-" * 80 + "\n \n " )
248+
227249 try :
228250 # Call tool with timeout
229251 result = await asyncio .wait_for (
@@ -236,12 +258,23 @@ async def run_stdio_tests():
236258 # Parse result
237259 if result .content and len (result .content ) > 0 :
238260 text_content = result .content [0 ].text
261+
262+ # Write output to log file
263+ with open (log_file , "a" , encoding = "utf-8" ) as f :
264+ f .write ("OUTPUT:\n " )
265+ f .write ("-" * 80 + "\n " )
266+ f .write (text_content + "\n " )
267+ f .write ("-" * 80 + "\n \n " )
268+ f .write (f"Duration: { duration :.1f} s\n " )
269+ f .write (f"Status: SUCCESS\n " )
270+
239271 try :
240272 data = json .loads (text_content )
241273 steps = data .get ("total_steps" , 0 )
242274 final_answer = data .get ("final_answer" , "" )
243275
244276 print (f" ✅ SUCCESS in { duration :.1f} s ({ steps } steps)" )
277+ print (f" 📝 Log saved: { log_file .name } " )
245278 if final_answer :
246279 preview = final_answer [:100 ].replace ('\n ' , ' ' )
247280 print (f" { preview } ..." )
@@ -254,14 +287,24 @@ async def run_stdio_tests():
254287 })
255288 except json .JSONDecodeError :
256289 print (f" ✅ SUCCESS in { duration :.1f} s (non-JSON response)" )
290+ print (f" 📝 Log saved: { log_file .name } " )
257291 results .append ({
258292 "test" : tool_name ,
259293 "success" : True ,
260294 "duration" : duration ,
261295 "steps" : 0
262296 })
263297 else :
298+ with open (log_file , "a" , encoding = "utf-8" ) as f :
299+ f .write ("OUTPUT:\n " )
300+ f .write ("-" * 80 + "\n " )
301+ f .write ("(Empty result)\n " )
302+ f .write ("-" * 80 + "\n \n " )
303+ f .write (f"Duration: { duration :.1f} s\n " )
304+ f .write (f"Status: FAILED (empty result)\n " )
305+
264306 print (f" ❌ FAILED: Empty result" )
307+ print (f" 📝 Log saved: { log_file .name } " )
265308 results .append ({
266309 "test" : tool_name ,
267310 "success" : False ,
@@ -271,7 +314,16 @@ async def run_stdio_tests():
271314
272315 except asyncio .TimeoutError :
273316 duration = timeout
317+ with open (log_file , "a" , encoding = "utf-8" ) as f :
318+ f .write ("OUTPUT:\n " )
319+ f .write ("-" * 80 + "\n " )
320+ f .write (f"TIMEOUT after { timeout } s\n " )
321+ f .write ("-" * 80 + "\n \n " )
322+ f .write (f"Duration: { duration :.1f} s\n " )
323+ f .write (f"Status: TIMEOUT\n " )
324+
274325 print (f" ❌ TIMEOUT after { timeout } s" )
326+ print (f" 📝 Log saved: { log_file .name } " )
275327 results .append ({
276328 "test" : tool_name ,
277329 "success" : False ,
@@ -281,9 +333,22 @@ async def run_stdio_tests():
281333
282334 except Exception as e :
283335 duration = asyncio .get_event_loop ().time () - start_time
336+ import traceback
337+ error_trace = traceback .format_exc ()
338+
339+ with open (log_file , "a" , encoding = "utf-8" ) as f :
340+ f .write ("OUTPUT:\n " )
341+ f .write ("-" * 80 + "\n " )
342+ f .write (f"ERROR: { e } \n \n " )
343+ f .write ("Full traceback:\n " )
344+ f .write (error_trace )
345+ f .write ("-" * 80 + "\n \n " )
346+ f .write (f"Duration: { duration :.1f} s\n " )
347+ f .write (f"Status: ERROR\n " )
348+
284349 print (f" ❌ ERROR: { e } " )
350+ print (f" 📝 Log saved: { log_file .name } " )
285351 print (f"\n 📋 Full error details:" )
286- import traceback
287352 traceback .print_exc ()
288353 results .append ({
289354 "test" : tool_name ,
@@ -312,6 +377,7 @@ async def run_stdio_tests():
312377 print ()
313378
314379 print (f"\n Total: { passed } /{ total } passed" )
380+ print (f"\n 📁 Detailed logs saved to: { TEST_OUTPUT_DIR } " )
315381 print ("=" * 72 )
316382
317383 return 0 if passed == total else 1
@@ -356,8 +422,24 @@ async def run_http_tests():
356422 print (f" Query: { query [:60 ]} ..." )
357423 print (f" Timeout: { timeout } s" )
358424
425+ # Create log file for this test
426+ timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
427+ log_file = TEST_OUTPUT_DIR / f"{ idx :02d} _{ tool_name } _{ timestamp } .log"
428+
359429 start_time = asyncio .get_event_loop ().time ()
360430
431+ # Write input to log file
432+ with open (log_file , "w" , encoding = "utf-8" ) as f :
433+ f .write ("=" * 80 + "\n " )
434+ f .write (f"Test: { tool_name } \n " )
435+ f .write (f"Timestamp: { timestamp } \n " )
436+ f .write (f"Timeout: { timeout } s\n " )
437+ f .write ("=" * 80 + "\n \n " )
438+ f .write ("INPUT QUERY:\n " )
439+ f .write ("-" * 80 + "\n " )
440+ f .write (query + "\n " )
441+ f .write ("-" * 80 + "\n \n " )
442+
361443 try :
362444 # Call tool with timeout
363445 result = await asyncio .wait_for (
@@ -370,12 +452,23 @@ async def run_http_tests():
370452 # Parse result
371453 if result .content and len (result .content ) > 0 :
372454 text_content = result .content [0 ].text
455+
456+ # Write output to log file
457+ with open (log_file , "a" , encoding = "utf-8" ) as f :
458+ f .write ("OUTPUT:\n " )
459+ f .write ("-" * 80 + "\n " )
460+ f .write (text_content + "\n " )
461+ f .write ("-" * 80 + "\n \n " )
462+ f .write (f"Duration: { duration :.1f} s\n " )
463+ f .write (f"Status: SUCCESS\n " )
464+
373465 try :
374466 data = json .loads (text_content )
375467 steps = data .get ("total_steps" , 0 )
376468 final_answer = data .get ("final_answer" , "" )
377469
378470 print (f" ✅ SUCCESS in { duration :.1f} s ({ steps } steps)" )
471+ print (f" 📝 Log saved: { log_file .name } " )
379472 if final_answer :
380473 preview = final_answer [:100 ].replace ('\n ' , ' ' )
381474 print (f" { preview } ..." )
@@ -388,14 +481,24 @@ async def run_http_tests():
388481 })
389482 except json .JSONDecodeError :
390483 print (f" ✅ SUCCESS in { duration :.1f} s (non-JSON response)" )
484+ print (f" 📝 Log saved: { log_file .name } " )
391485 results .append ({
392486 "test" : tool_name ,
393487 "success" : True ,
394488 "duration" : duration ,
395489 "steps" : 0
396490 })
397491 else :
492+ with open (log_file , "a" , encoding = "utf-8" ) as f :
493+ f .write ("OUTPUT:\n " )
494+ f .write ("-" * 80 + "\n " )
495+ f .write ("(Empty result)\n " )
496+ f .write ("-" * 80 + "\n \n " )
497+ f .write (f"Duration: { duration :.1f} s\n " )
498+ f .write (f"Status: FAILED (empty result)\n " )
499+
398500 print (f" ❌ FAILED: Empty result" )
501+ print (f" 📝 Log saved: { log_file .name } " )
399502 results .append ({
400503 "test" : tool_name ,
401504 "success" : False ,
@@ -405,7 +508,16 @@ async def run_http_tests():
405508
406509 except asyncio .TimeoutError :
407510 duration = timeout
511+ with open (log_file , "a" , encoding = "utf-8" ) as f :
512+ f .write ("OUTPUT:\n " )
513+ f .write ("-" * 80 + "\n " )
514+ f .write (f"TIMEOUT after { timeout } s\n " )
515+ f .write ("-" * 80 + "\n \n " )
516+ f .write (f"Duration: { duration :.1f} s\n " )
517+ f .write (f"Status: TIMEOUT\n " )
518+
408519 print (f" ❌ TIMEOUT after { timeout } s" )
520+ print (f" 📝 Log saved: { log_file .name } " )
409521 results .append ({
410522 "test" : tool_name ,
411523 "success" : False ,
@@ -415,9 +527,22 @@ async def run_http_tests():
415527
416528 except Exception as e :
417529 duration = asyncio .get_event_loop ().time () - start_time
530+ import traceback
531+ error_trace = traceback .format_exc ()
532+
533+ with open (log_file , "a" , encoding = "utf-8" ) as f :
534+ f .write ("OUTPUT:\n " )
535+ f .write ("-" * 80 + "\n " )
536+ f .write (f"ERROR: { e } \n \n " )
537+ f .write ("Full traceback:\n " )
538+ f .write (error_trace )
539+ f .write ("-" * 80 + "\n \n " )
540+ f .write (f"Duration: { duration :.1f} s\n " )
541+ f .write (f"Status: ERROR\n " )
542+
418543 print (f" ❌ ERROR: { e } " )
544+ print (f" 📝 Log saved: { log_file .name } " )
419545 print (f"\n 📋 Full error details:" )
420- import traceback
421546 traceback .print_exc ()
422547 results .append ({
423548 "test" : tool_name ,
@@ -456,6 +581,7 @@ async def run_http_tests():
456581 print ()
457582
458583 print (f"\n Total: { passed } /{ total } passed" )
584+ print (f"\n 📁 Detailed logs saved to: { TEST_OUTPUT_DIR } " )
459585 print ("=" * 72 )
460586
461587 return 0 if passed == total else 1
0 commit comments