Skip to content

Commit 1c25e82

Browse files
committed
feat: enhance agentic test script with extended timeouts and detailed logging
Updated test_agentic_mcp.py with improved debugging and inspection capabilities: Timeout Changes: - Increased all test timeouts from 60/90s to 300s (5 minutes) - Allows agentic workflows to complete multi-step reasoning without interruption File Logging: - Created test_output/ directory for detailed test logs - Each test saves input query and output to timestamped log file - Log file naming: {test_number:02d}_{tool_name}_{timestamp}.log - Logs include: test metadata, input query, full output, duration, status Enhanced Output: - Shows log filename after each test completes - Summary displays test_output directory location - Added test output directory to configuration display - Preserves full error traces and timeouts in log files Benefits: - Easier debugging of agentic workflow issues - Can inspect full responses without console scrolling - Timestamped logs for comparing runs - Complete error context preserved for analysis Files changed: - test_agentic_mcp.py: Added file logging and increased timeouts - .gitignore: Added /test_output/ to ignore test logs
1 parent 4872048 commit 1c25e82

File tree

2 files changed

+136
-9
lines changed

2 files changed

+136
-9
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,4 @@ OUROBOROS.md
179179
SESSION-MEMORY.md
180180
.serena/
181181
.codegraph.toml
182+
/test_output/

test_agentic_mcp.py

Lines changed: 135 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,33 +83,37 @@
8383
AGENTIC_TESTS = [
8484
("agentic_code_search",
8585
"How is configuration loaded in this codebase? Find all config loading mechanisms.",
86-
60),
86+
300),
8787

8888
("agentic_dependency_analysis",
8989
"Analyze the dependency chain for the AgenticOrchestrator. What does it depend on?",
90-
60),
90+
300),
9191

9292
("agentic_call_chain_analysis",
9393
"Trace the call chain from execute_agentic_workflow to the graph analysis tools",
94-
60),
94+
300),
9595

9696
("agentic_architecture_analysis",
9797
"Analyze the architecture of the MCP server. Find coupling metrics and hub nodes.",
98-
90),
98+
300),
9999

100100
("agentic_api_surface_analysis",
101101
"What is the public API surface of the GraphToolExecutor?",
102-
60),
102+
300),
103103

104104
("agentic_context_builder",
105105
"Gather comprehensive context about the tier-aware prompt selection system",
106-
90),
106+
300),
107107

108108
("agentic_semantic_question",
109109
"How does the LRU cache work in GraphToolExecutor? What gets cached and when?",
110-
60),
110+
300),
111111
]
112112

113+
# Create output directory for test logs
114+
TEST_OUTPUT_DIR = Path(__file__).resolve().parent / "test_output"
115+
TEST_OUTPUT_DIR.mkdir(exist_ok=True)
116+
113117

114118
def print_config():
115119
"""Print resolved configuration."""
@@ -146,6 +150,8 @@ def print_config():
146150
db = os.environ.get("CODEGRAPH_SURREALDB_DATABASE", "codegraph")
147151
print(f"\n SurrealDB: {url}")
148152
print(f" Namespace/DB: {ns}/{db}")
153+
154+
print(f"\n 📁 Test Output Directory: {TEST_OUTPUT_DIR}")
149155
print("=" * 72)
150156

151157

@@ -222,8 +228,24 @@ async def run_stdio_tests():
222228
print(f" Query: {query[:60]}...")
223229
print(f" Timeout: {timeout}s")
224230

231+
# Create log file for this test
232+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
233+
log_file = TEST_OUTPUT_DIR / f"{idx:02d}_{tool_name}_{timestamp}.log"
234+
225235
start_time = asyncio.get_event_loop().time()
226236

237+
# Write input to log file
238+
with open(log_file, "w", encoding="utf-8") as f:
239+
f.write("=" * 80 + "\n")
240+
f.write(f"Test: {tool_name}\n")
241+
f.write(f"Timestamp: {timestamp}\n")
242+
f.write(f"Timeout: {timeout}s\n")
243+
f.write("=" * 80 + "\n\n")
244+
f.write("INPUT QUERY:\n")
245+
f.write("-" * 80 + "\n")
246+
f.write(query + "\n")
247+
f.write("-" * 80 + "\n\n")
248+
227249
try:
228250
# Call tool with timeout
229251
result = await asyncio.wait_for(
@@ -236,12 +258,23 @@ async def run_stdio_tests():
236258
# Parse result
237259
if result.content and len(result.content) > 0:
238260
text_content = result.content[0].text
261+
262+
# Write output to log file
263+
with open(log_file, "a", encoding="utf-8") as f:
264+
f.write("OUTPUT:\n")
265+
f.write("-" * 80 + "\n")
266+
f.write(text_content + "\n")
267+
f.write("-" * 80 + "\n\n")
268+
f.write(f"Duration: {duration:.1f}s\n")
269+
f.write(f"Status: SUCCESS\n")
270+
239271
try:
240272
data = json.loads(text_content)
241273
steps = data.get("total_steps", 0)
242274
final_answer = data.get("final_answer", "")
243275

244276
print(f" ✅ SUCCESS in {duration:.1f}s ({steps} steps)")
277+
print(f" 📝 Log saved: {log_file.name}")
245278
if final_answer:
246279
preview = final_answer[:100].replace('\n', ' ')
247280
print(f" {preview}...")
@@ -254,14 +287,24 @@ async def run_stdio_tests():
254287
})
255288
except json.JSONDecodeError:
256289
print(f" ✅ SUCCESS in {duration:.1f}s (non-JSON response)")
290+
print(f" 📝 Log saved: {log_file.name}")
257291
results.append({
258292
"test": tool_name,
259293
"success": True,
260294
"duration": duration,
261295
"steps": 0
262296
})
263297
else:
298+
with open(log_file, "a", encoding="utf-8") as f:
299+
f.write("OUTPUT:\n")
300+
f.write("-" * 80 + "\n")
301+
f.write("(Empty result)\n")
302+
f.write("-" * 80 + "\n\n")
303+
f.write(f"Duration: {duration:.1f}s\n")
304+
f.write(f"Status: FAILED (empty result)\n")
305+
264306
print(f" ❌ FAILED: Empty result")
307+
print(f" 📝 Log saved: {log_file.name}")
265308
results.append({
266309
"test": tool_name,
267310
"success": False,
@@ -271,7 +314,16 @@ async def run_stdio_tests():
271314

272315
except asyncio.TimeoutError:
273316
duration = timeout
317+
with open(log_file, "a", encoding="utf-8") as f:
318+
f.write("OUTPUT:\n")
319+
f.write("-" * 80 + "\n")
320+
f.write(f"TIMEOUT after {timeout}s\n")
321+
f.write("-" * 80 + "\n\n")
322+
f.write(f"Duration: {duration:.1f}s\n")
323+
f.write(f"Status: TIMEOUT\n")
324+
274325
print(f" ❌ TIMEOUT after {timeout}s")
326+
print(f" 📝 Log saved: {log_file.name}")
275327
results.append({
276328
"test": tool_name,
277329
"success": False,
@@ -281,9 +333,22 @@ async def run_stdio_tests():
281333

282334
except Exception as e:
283335
duration = asyncio.get_event_loop().time() - start_time
336+
import traceback
337+
error_trace = traceback.format_exc()
338+
339+
with open(log_file, "a", encoding="utf-8") as f:
340+
f.write("OUTPUT:\n")
341+
f.write("-" * 80 + "\n")
342+
f.write(f"ERROR: {e}\n\n")
343+
f.write("Full traceback:\n")
344+
f.write(error_trace)
345+
f.write("-" * 80 + "\n\n")
346+
f.write(f"Duration: {duration:.1f}s\n")
347+
f.write(f"Status: ERROR\n")
348+
284349
print(f" ❌ ERROR: {e}")
350+
print(f" 📝 Log saved: {log_file.name}")
285351
print(f"\n 📋 Full error details:")
286-
import traceback
287352
traceback.print_exc()
288353
results.append({
289354
"test": tool_name,
@@ -312,6 +377,7 @@ async def run_stdio_tests():
312377
print()
313378

314379
print(f"\nTotal: {passed}/{total} passed")
380+
print(f"\n📁 Detailed logs saved to: {TEST_OUTPUT_DIR}")
315381
print("=" * 72)
316382

317383
return 0 if passed == total else 1
@@ -356,8 +422,24 @@ async def run_http_tests():
356422
print(f" Query: {query[:60]}...")
357423
print(f" Timeout: {timeout}s")
358424

425+
# Create log file for this test
426+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
427+
log_file = TEST_OUTPUT_DIR / f"{idx:02d}_{tool_name}_{timestamp}.log"
428+
359429
start_time = asyncio.get_event_loop().time()
360430

431+
# Write input to log file
432+
with open(log_file, "w", encoding="utf-8") as f:
433+
f.write("=" * 80 + "\n")
434+
f.write(f"Test: {tool_name}\n")
435+
f.write(f"Timestamp: {timestamp}\n")
436+
f.write(f"Timeout: {timeout}s\n")
437+
f.write("=" * 80 + "\n\n")
438+
f.write("INPUT QUERY:\n")
439+
f.write("-" * 80 + "\n")
440+
f.write(query + "\n")
441+
f.write("-" * 80 + "\n\n")
442+
361443
try:
362444
# Call tool with timeout
363445
result = await asyncio.wait_for(
@@ -370,12 +452,23 @@ async def run_http_tests():
370452
# Parse result
371453
if result.content and len(result.content) > 0:
372454
text_content = result.content[0].text
455+
456+
# Write output to log file
457+
with open(log_file, "a", encoding="utf-8") as f:
458+
f.write("OUTPUT:\n")
459+
f.write("-" * 80 + "\n")
460+
f.write(text_content + "\n")
461+
f.write("-" * 80 + "\n\n")
462+
f.write(f"Duration: {duration:.1f}s\n")
463+
f.write(f"Status: SUCCESS\n")
464+
373465
try:
374466
data = json.loads(text_content)
375467
steps = data.get("total_steps", 0)
376468
final_answer = data.get("final_answer", "")
377469

378470
print(f" ✅ SUCCESS in {duration:.1f}s ({steps} steps)")
471+
print(f" 📝 Log saved: {log_file.name}")
379472
if final_answer:
380473
preview = final_answer[:100].replace('\n', ' ')
381474
print(f" {preview}...")
@@ -388,14 +481,24 @@ async def run_http_tests():
388481
})
389482
except json.JSONDecodeError:
390483
print(f" ✅ SUCCESS in {duration:.1f}s (non-JSON response)")
484+
print(f" 📝 Log saved: {log_file.name}")
391485
results.append({
392486
"test": tool_name,
393487
"success": True,
394488
"duration": duration,
395489
"steps": 0
396490
})
397491
else:
492+
with open(log_file, "a", encoding="utf-8") as f:
493+
f.write("OUTPUT:\n")
494+
f.write("-" * 80 + "\n")
495+
f.write("(Empty result)\n")
496+
f.write("-" * 80 + "\n\n")
497+
f.write(f"Duration: {duration:.1f}s\n")
498+
f.write(f"Status: FAILED (empty result)\n")
499+
398500
print(f" ❌ FAILED: Empty result")
501+
print(f" 📝 Log saved: {log_file.name}")
399502
results.append({
400503
"test": tool_name,
401504
"success": False,
@@ -405,7 +508,16 @@ async def run_http_tests():
405508

406509
except asyncio.TimeoutError:
407510
duration = timeout
511+
with open(log_file, "a", encoding="utf-8") as f:
512+
f.write("OUTPUT:\n")
513+
f.write("-" * 80 + "\n")
514+
f.write(f"TIMEOUT after {timeout}s\n")
515+
f.write("-" * 80 + "\n\n")
516+
f.write(f"Duration: {duration:.1f}s\n")
517+
f.write(f"Status: TIMEOUT\n")
518+
408519
print(f" ❌ TIMEOUT after {timeout}s")
520+
print(f" 📝 Log saved: {log_file.name}")
409521
results.append({
410522
"test": tool_name,
411523
"success": False,
@@ -415,9 +527,22 @@ async def run_http_tests():
415527

416528
except Exception as e:
417529
duration = asyncio.get_event_loop().time() - start_time
530+
import traceback
531+
error_trace = traceback.format_exc()
532+
533+
with open(log_file, "a", encoding="utf-8") as f:
534+
f.write("OUTPUT:\n")
535+
f.write("-" * 80 + "\n")
536+
f.write(f"ERROR: {e}\n\n")
537+
f.write("Full traceback:\n")
538+
f.write(error_trace)
539+
f.write("-" * 80 + "\n\n")
540+
f.write(f"Duration: {duration:.1f}s\n")
541+
f.write(f"Status: ERROR\n")
542+
418543
print(f" ❌ ERROR: {e}")
544+
print(f" 📝 Log saved: {log_file.name}")
419545
print(f"\n 📋 Full error details:")
420-
import traceback
421546
traceback.print_exc()
422547
results.append({
423548
"test": tool_name,
@@ -456,6 +581,7 @@ async def run_http_tests():
456581
print()
457582

458583
print(f"\nTotal: {passed}/{total} passed")
584+
print(f"\n📁 Detailed logs saved to: {TEST_OUTPUT_DIR}")
459585
print("=" * 72)
460586

461587
return 0 if passed == total else 1

0 commit comments

Comments
 (0)