CodebuffAI
diff --git a/‎packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts‎
Lines changed: 25 additions & 38 deletions b/‎packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts‎
Lines changed: 25 additions & 38 deletions
diff --git a/‎packages/agent-runtime/src/__tests__/main-prompt.test.ts‎
Lines changed: 22 additions & 18 deletions b/‎packages/agent-runtime/src/__tests__/main-prompt.test.ts‎
Lines changed: 22 additions & 18 deletions
diff --git a/‎packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts‎
Lines changed: 38 additions & 35 deletions b/‎packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts‎
Lines changed: 38 additions & 35 deletions
@@ -5,7 +5,6 @@ import {
   clearMockedModules,
   mockModule,
 } from '@codebuff/common/testing/mock-modules'
-import { getToolCallString } from '@codebuff/common/tools/utils'
 import { getInitialSessionState } from '@codebuff/common/types/session-state'
 import { assistantMessage, userMessage } from '@codebuff/common/util/messages'
 import db from '@codebuff/internal/db'
@@ -25,7 +24,7 @@ import { z } from 'zod/v4'
 import { disableLiveUserInputCheck } from '../live-user-inputs'
 import { loopAgentSteps } from '../run-agent-step'
 import { clearAgentGeneratorCache } from '../run-programmatic-step'
-import { mockFileContext } from './test-utils'
+import { createToolCallChunk, mockFileContext } from './test-utils'
 
 import type { AgentTemplate } from '../templates/types'
 import type { StepGenerator } from '@codebuff/common/types/agent-template'
@@ -81,10 +80,8 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
 
     agentRuntimeImpl.promptAiSdkStream = async function* ({}) {
       llmCallCount++
-      yield {
-        type: 'text' as const,
-        text: `LLM response\n\n${getToolCallString('end_turn', {})}`,
-      }
+      yield { type: 'text' as const, text: 'LLM response\n\n' }
+      yield createToolCallChunk('end_turn', {})
       return 'mock-message-id'
     }
 
@@ -508,10 +505,8 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
       llmStepCount++
 
       // LLM always tries to end turn
-      yield {
-        type: 'text' as const,
-        text: `LLM response\n\n${getToolCallString('end_turn', {})}`,
-      }
+      yield { type: 'text' as const, text: 'LLM response\n\n' }
+      yield createToolCallChunk('end_turn', {})
       return `mock-message-id-${promptCallCount}`
     }
 
@@ -558,10 +553,8 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
       llmCallNumber++
       if (llmCallNumber === 1) {
         // First call: agent tries to end turn without setting output
-        yield {
-          type: 'text' as const,
-          text: `First response without output\n\n${getToolCallString('end_turn', {})}`,
-        }
+        yield { type: 'text' as const, text: 'First response without output\n\n' }
+        yield createToolCallChunk('end_turn', {})
       } else if (llmCallNumber === 2) {
         // Second call: agent sets output after being reminded
         // Manually set the output to simulate the set_output tool execution
@@ -571,16 +564,14 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
             status: 'success',
           }
         }
-        yield {
-          type: 'text' as const,
-          text: `Setting output now\n\n${getToolCallString('set_output', { result: 'test result', status: 'success' })}\n\n${getToolCallString('end_turn', {})}`,
-        }
+        yield { type: 'text' as const, text: 'Setting output now\n\n' }
+        yield createToolCallChunk('set_output', { result: 'test result', status: 'success' })
+        yield { type: 'text' as const, text: '\n\n' }
+        yield createToolCallChunk('end_turn', {})
       } else {
         // Safety: if called more than twice, just end
-        yield {
-          type: 'text' as const,
-          text: `Ending\n\n${getToolCallString('end_turn', {})}`,
-        }
+        yield { type: 'text' as const, text: 'Ending\n\n' }
+        yield createToolCallChunk('end_turn', {})
       }
       return 'mock-message-id'
     }
@@ -641,10 +632,10 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
       if (capturedAgentState) {
         capturedAgentState.output = { result: 'success' }
       }
-      yield {
-        type: 'text' as const,
-        text: `Setting output\n\n${getToolCallString('set_output', { result: 'success' })}\n\n${getToolCallString('end_turn', {})}`,
-      }
+      yield { type: 'text' as const, text: 'Setting output\n\n' }
+      yield createToolCallChunk('set_output', { result: 'success' })
+      yield { type: 'text' as const, text: '\n\n' }
+      yield createToolCallChunk('end_turn', {})
       return 'mock-message-id'
     }
 
@@ -757,10 +748,8 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
     let llmCallNumber = 0
     loopAgentStepsBaseParams.promptAiSdkStream = async function* ({}) {
       llmCallNumber++
-      yield {
-        type: 'text' as const,
-        text: `Response without output\n\n${getToolCallString('end_turn', {})}`,
-      }
+      yield { type: 'text' as const, text: 'Response without output\n\n' }
+      yield createToolCallChunk('end_turn', {})
       return 'mock-message-id'
     }
 
@@ -802,19 +791,17 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
       llmCallNumber++
       if (llmCallNumber === 1) {
         // First call: agent does some work but doesn't end turn
-        yield {
-          type: 'text' as const,
-          text: `Doing work\n\n${getToolCallString('read_files', { paths: ['test.txt'] })}`,
-        }
+        yield { type: 'text' as const, text: 'Doing work\n\n' }
+        yield createToolCallChunk('read_files', { paths: ['test.txt'] })
       } else {
         // Second call: agent sets output and ends
         if (capturedAgentState) {
           capturedAgentState.output = { result: 'done' }
         }
-        yield {
-          type: 'text' as const,
-          text: `Finishing\n\n${getToolCallString('set_output', { result: 'done' })}\n\n${getToolCallString('end_turn', {})}`,
-        }
+        yield { type: 'text' as const, text: 'Finishing\n\n' }
+        yield createToolCallChunk('set_output', { result: 'done' })
+        yield { type: 'text' as const, text: '\n\n' }
+        yield createToolCallChunk('end_turn', {})
       }
       return 'mock-message-id'
     }
 
@@ -2,7 +2,6 @@ import * as bigquery from '@codebuff/bigquery'
 import * as analytics from '@codebuff/common/analytics'
 import { TEST_USER_ID } from '@codebuff/common/old-constants'
 import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
-import { getToolCallString } from '@codebuff/common/tools/utils'
 import {
   AgentTemplateTypes,
   getInitialSessionState,
@@ -33,9 +32,15 @@ import type { ProjectFileContext } from '@codebuff/common/util/file'
 
 let mainPromptBaseParams: ParamsExcluding<typeof mainPrompt, 'action'>
 
-const mockAgentStream = (streamOutput: string) => {
+import { createToolCallChunk } from './test-utils'
+
+import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
+
+const mockAgentStream = (chunks: StreamChunk[]) => {
   mainPromptBaseParams.promptAiSdkStream = async function* ({}) {
-    yield { type: 'text' as const, text: streamOutput }
+    for (const chunk of chunks) {
+      yield chunk
+    }
     return 'mock-message-id'
   }
 }
@@ -117,7 +122,7 @@ describe('mainPrompt', () => {
     )
 
     // Mock LLM APIs
-    mockAgentStream('Test response')
+    mockAgentStream([{ type: 'text', text: 'Test response' }])
 
     // Mock websocket actions
     mainPromptBaseParams.requestFiles = async ({ filePaths }) => {
@@ -196,15 +201,15 @@ describe('mainPrompt', () => {
   }
 
   it('should handle write_file tool call', async () => {
-    // Mock LLM to return a write_file tool call using getToolCallString
-    const mockResponse =
-      getToolCallString('write_file', {
+    // Mock LLM to return a write_file tool call using native tool call chunks
+    mockAgentStream([
+      createToolCallChunk('write_file', {
         path: 'new-file.txt',
         instructions: 'Added Hello World',
         content: 'Hello, world!',
-      }) + getToolCallString('end_turn', {})
-
-    mockAgentStream(mockResponse)
+      }),
+      createToolCallChunk('end_turn', {}),
+    ])
 
     // Get reference to the spy so we can check if it was called
     const requestToolCallSpy = mainPromptBaseParams.requestToolCall
@@ -355,7 +360,7 @@ describe('mainPrompt', () => {
 
   it('should return no tool calls when LLM response is empty', async () => {
     // Mock the LLM stream to return nothing
-    mockAgentStream('')
+    mockAgentStream([])
 
     const sessionState = getInitialSessionState(mockFileContext)
     const action = {
@@ -380,16 +385,15 @@ describe('mainPrompt', () => {
   it('should unescape ampersands in run_terminal_command tool calls', async () => {
     const sessionState = getInitialSessionState(mockFileContext)
     const userPromptText = 'Run the backend tests'
-    const escapedCommand = 'cd backend && bun test'
     const expectedCommand = 'cd backend && bun test'
 
-    const mockResponse =
-      getToolCallString('run_terminal_command', {
-        command: escapedCommand,
+    mockAgentStream([
+      createToolCallChunk('run_terminal_command', {
+        command: expectedCommand,
         process_type: 'SYNC',
-      }) + getToolCallString('end_turn', {})
-
-    mockAgentStream(mockResponse)
+      }),
+      createToolCallChunk('end_turn', {}),
+    ])
 
     // Get reference to the spy so we can check if it was called
     const requestToolCallSpy = mainPromptBaseParams.requestToolCall
 
@@ -2,7 +2,6 @@ import * as bigquery from '@codebuff/bigquery'
 import * as analytics from '@codebuff/common/analytics'
 import { TEST_USER_ID } from '@codebuff/common/old-constants'
 import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
-import { getToolCallString } from '@codebuff/common/tools/utils'
 import { getInitialSessionState } from '@codebuff/common/types/session-state'
 import * as stringUtils from '@codebuff/common/util/string'
 import {
@@ -15,9 +14,11 @@ import {
   test,
 } from 'bun:test'
 
-import { mockFileContext } from './test-utils'
+import { createToolCallChunk, mockFileContext } from './test-utils'
 import { processStream } from '../tools/stream-parser'
 
+import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
+
 import type { AgentTemplate } from '../templates/types'
 import type {
   AgentRuntimeDeps,
@@ -119,22 +120,26 @@ describe('malformed tool call error handling', () => {
     agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL }
   })
 
-  function createMockStream(chunks: string[]) {
+  function createMockStream(chunks: StreamChunk[]) {
     async function* generator() {
       for (const chunk of chunks) {
-        yield { type: 'text' as const, text: chunk }
+        yield chunk
       }
       return 'mock-message-id'
     }
     return generator()
   }
 
+  function textChunk(text: string): StreamChunk {
+    return { type: 'text' as const, text }
+  }
+
   test('should add tool result errors to message history after stream completes', async () => {
-    const chunks = [
-      // Malformed JSON tool call
-      '<codebuff_tool_call>\n{\n  "cb_tool_name": "read_files",\n  "paths": ["test.ts"\n}\n</codebuff_tool_call>',
-      // Valid end turn
-      getToolCallString('end_turn', {}),
+    // With native tools, malformed tool calls are handled at the API level.
+    // This test now verifies that an unknown tool is properly handled.
+    const chunks: StreamChunk[] = [
+      createToolCallChunk('unknown_tool_xyz', { paths: ['test.ts'] }),
+      createToolCallChunk('end_turn', {}),
     ]
 
     const stream = createMockStream(chunks)
@@ -152,7 +157,7 @@ describe('malformed tool call error handling', () => {
 
     expect(toolMessages.length).toBeGreaterThan(0)
 
-    // Find the error tool result
+    // Find the error tool result for the unknown tool
     const errorToolResult = toolMessages.find(
       (m) =>
         m.content?.[0]?.type === 'json' &&
@@ -162,17 +167,15 @@ describe('malformed tool call error handling', () => {
     expect(errorToolResult).toBeDefined()
     expect(
       (errorToolResult?.content?.[0] as any)?.value?.errorMessage,
-    ).toContain('Invalid JSON')
+    ).toContain('not found')
   })
 
-  test('should handle multiple malformed tool calls', async () => {
-    const chunks = [
-      // First malformed call
-      '<codebuff_tool_call>\n{\n  "cb_tool_name": "read_files",\n  invalid\n}\n</codebuff_tool_call>',
-      'Some text between calls',
-      // Second malformed call
-      '<codebuff_tool_call>\n{\n  missing_quotes: value\n}\n</codebuff_tool_call>',
-      getToolCallString('end_turn', {}),
+  test('should handle multiple unknown tool calls', async () => {
+    const chunks: StreamChunk[] = [
+      createToolCallChunk('unknown_tool_1', { param: 'value1' }),
+      textChunk('Some text between calls'),
+      createToolCallChunk('unknown_tool_2', { param: 'value2' }),
+      createToolCallChunk('end_turn', {}),
     ]
 
     const stream = createMockStream(chunks)
@@ -197,9 +200,9 @@ describe('malformed tool call error handling', () => {
   })
 
   test('should preserve original toolResults array alongside message history', async () => {
-    const chunks = [
-      '<codebuff_tool_call>\n{\n  "cb_tool_name": "read_files",\n  malformed\n}\n</codebuff_tool_call>',
-      getToolCallString('end_turn', {}),
+    const chunks: StreamChunk[] = [
+      createToolCallChunk('unknown_tool_xyz', { param: 'value' }),
+      createToolCallChunk('end_turn', {}),
     ]
 
     const stream = createMockStream(chunks)
@@ -228,9 +231,9 @@ describe('malformed tool call error handling', () => {
   })
 
   test('should handle unknown tool names and add error to message history', async () => {
-    const chunks = [
-      '<codebuff_tool_call>\n{\n  "cb_tool_name": "unknown_tool",\n  "param": "value"\n}\n</codebuff_tool_call>',
-      getToolCallString('end_turn', {}),
+    const chunks: StreamChunk[] = [
+      createToolCallChunk('unknown_tool', { param: 'value' }),
+      createToolCallChunk('end_turn', {}),
     ]
 
     const stream = createMockStream(chunks)
@@ -258,12 +261,12 @@ describe('malformed tool call error handling', () => {
   })
 
   test('should not affect valid tool calls in message history', async () => {
-    const chunks = [
+    const chunks: StreamChunk[] = [
       // Valid tool call
-      getToolCallString('read_files', { paths: ['test.ts'] }),
-      // Malformed tool call
-      '<codebuff_tool_call>\n{\n  "cb_tool_name": "read_files",\n  invalid\n}\n</codebuff_tool_call>',
-      getToolCallString('end_turn', {}),
+      createToolCallChunk('read_files', { paths: ['test.ts'] }),
+      // Unknown tool call
+      createToolCallChunk('unknown_tool_xyz', { param: 'value' }),
+      createToolCallChunk('end_turn', {}),
     ]
 
     const stream = createMockStream(chunks)
@@ -299,10 +302,10 @@ describe('malformed tool call error handling', () => {
     expect(errorResults.length).toBeGreaterThan(0)
   })
 
-  test('should handle stream with only malformed calls', async () => {
-    const chunks = [
-      '<codebuff_tool_call>\n{\n  invalid1\n}\n</codebuff_tool_call>',
-      '<codebuff_tool_call>\n{\n  invalid2\n}\n</codebuff_tool_call>',
+  test('should handle stream with only unknown tool calls', async () => {
+    const chunks: StreamChunk[] = [
+      createToolCallChunk('unknown_tool_1', { param: 'value1' }),
+      createToolCallChunk('unknown_tool_2', { param: 'value2' }),
     ]
 
     const stream = createMockStream(chunks)
@@ -320,7 +323,7 @@ describe('malformed tool call error handling', () => {
     toolMessages.forEach((msg) => {
       expect(msg.content?.[0]?.type).toBe('json')
       expect((msg.content?.[0] as any)?.value?.errorMessage).toContain(
-        'Invalid JSON',
+        'not found',
       )
     })
   })