Skip to content

Commit f06813c

Browse files
committed
Fix unit tests for agent-runtime!
1 parent 72c617a commit f06813c

File tree

9 files changed

+286
-545
lines changed

9 files changed

+286
-545
lines changed

packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts

Lines changed: 25 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import {
55
clearMockedModules,
66
mockModule,
77
} from '@codebuff/common/testing/mock-modules'
8-
import { getToolCallString } from '@codebuff/common/tools/utils'
98
import { getInitialSessionState } from '@codebuff/common/types/session-state'
109
import { assistantMessage, userMessage } from '@codebuff/common/util/messages'
1110
import db from '@codebuff/internal/db'
@@ -25,7 +24,7 @@ import { z } from 'zod/v4'
2524
import { disableLiveUserInputCheck } from '../live-user-inputs'
2625
import { loopAgentSteps } from '../run-agent-step'
2726
import { clearAgentGeneratorCache } from '../run-programmatic-step'
28-
import { mockFileContext } from './test-utils'
27+
import { createToolCallChunk, mockFileContext } from './test-utils'
2928

3029
import type { AgentTemplate } from '../templates/types'
3130
import type { StepGenerator } from '@codebuff/common/types/agent-template'
@@ -81,10 +80,8 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
8180

8281
agentRuntimeImpl.promptAiSdkStream = async function* ({}) {
8382
llmCallCount++
84-
yield {
85-
type: 'text' as const,
86-
text: `LLM response\n\n${getToolCallString('end_turn', {})}`,
87-
}
83+
yield { type: 'text' as const, text: 'LLM response\n\n' }
84+
yield createToolCallChunk('end_turn', {})
8885
return 'mock-message-id'
8986
}
9087

@@ -508,10 +505,8 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
508505
llmStepCount++
509506

510507
// LLM always tries to end turn
511-
yield {
512-
type: 'text' as const,
513-
text: `LLM response\n\n${getToolCallString('end_turn', {})}`,
514-
}
508+
yield { type: 'text' as const, text: 'LLM response\n\n' }
509+
yield createToolCallChunk('end_turn', {})
515510
return `mock-message-id-${promptCallCount}`
516511
}
517512

@@ -558,10 +553,8 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
558553
llmCallNumber++
559554
if (llmCallNumber === 1) {
560555
// First call: agent tries to end turn without setting output
561-
yield {
562-
type: 'text' as const,
563-
text: `First response without output\n\n${getToolCallString('end_turn', {})}`,
564-
}
556+
yield { type: 'text' as const, text: 'First response without output\n\n' }
557+
yield createToolCallChunk('end_turn', {})
565558
} else if (llmCallNumber === 2) {
566559
// Second call: agent sets output after being reminded
567560
// Manually set the output to simulate the set_output tool execution
@@ -571,16 +564,14 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
571564
status: 'success',
572565
}
573566
}
574-
yield {
575-
type: 'text' as const,
576-
text: `Setting output now\n\n${getToolCallString('set_output', { result: 'test result', status: 'success' })}\n\n${getToolCallString('end_turn', {})}`,
577-
}
567+
yield { type: 'text' as const, text: 'Setting output now\n\n' }
568+
yield createToolCallChunk('set_output', { result: 'test result', status: 'success' })
569+
yield { type: 'text' as const, text: '\n\n' }
570+
yield createToolCallChunk('end_turn', {})
578571
} else {
579572
// Safety: if called more than twice, just end
580-
yield {
581-
type: 'text' as const,
582-
text: `Ending\n\n${getToolCallString('end_turn', {})}`,
583-
}
573+
yield { type: 'text' as const, text: 'Ending\n\n' }
574+
yield createToolCallChunk('end_turn', {})
584575
}
585576
return 'mock-message-id'
586577
}
@@ -641,10 +632,10 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
641632
if (capturedAgentState) {
642633
capturedAgentState.output = { result: 'success' }
643634
}
644-
yield {
645-
type: 'text' as const,
646-
text: `Setting output\n\n${getToolCallString('set_output', { result: 'success' })}\n\n${getToolCallString('end_turn', {})}`,
647-
}
635+
yield { type: 'text' as const, text: 'Setting output\n\n' }
636+
yield createToolCallChunk('set_output', { result: 'success' })
637+
yield { type: 'text' as const, text: '\n\n' }
638+
yield createToolCallChunk('end_turn', {})
648639
return 'mock-message-id'
649640
}
650641

@@ -757,10 +748,8 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
757748
let llmCallNumber = 0
758749
loopAgentStepsBaseParams.promptAiSdkStream = async function* ({}) {
759750
llmCallNumber++
760-
yield {
761-
type: 'text' as const,
762-
text: `Response without output\n\n${getToolCallString('end_turn', {})}`,
763-
}
751+
yield { type: 'text' as const, text: 'Response without output\n\n' }
752+
yield createToolCallChunk('end_turn', {})
764753
return 'mock-message-id'
765754
}
766755

@@ -802,19 +791,17 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
802791
llmCallNumber++
803792
if (llmCallNumber === 1) {
804793
// First call: agent does some work but doesn't end turn
805-
yield {
806-
type: 'text' as const,
807-
text: `Doing work\n\n${getToolCallString('read_files', { paths: ['test.txt'] })}`,
808-
}
794+
yield { type: 'text' as const, text: 'Doing work\n\n' }
795+
yield createToolCallChunk('read_files', { paths: ['test.txt'] })
809796
} else {
810797
// Second call: agent sets output and ends
811798
if (capturedAgentState) {
812799
capturedAgentState.output = { result: 'done' }
813800
}
814-
yield {
815-
type: 'text' as const,
816-
text: `Finishing\n\n${getToolCallString('set_output', { result: 'done' })}\n\n${getToolCallString('end_turn', {})}`,
817-
}
801+
yield { type: 'text' as const, text: 'Finishing\n\n' }
802+
yield createToolCallChunk('set_output', { result: 'done' })
803+
yield { type: 'text' as const, text: '\n\n' }
804+
yield createToolCallChunk('end_turn', {})
818805
}
819806
return 'mock-message-id'
820807
}

packages/agent-runtime/src/__tests__/main-prompt.test.ts

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import * as bigquery from '@codebuff/bigquery'
22
import * as analytics from '@codebuff/common/analytics'
33
import { TEST_USER_ID } from '@codebuff/common/old-constants'
44
import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
5-
import { getToolCallString } from '@codebuff/common/tools/utils'
65
import {
76
AgentTemplateTypes,
87
getInitialSessionState,
@@ -33,9 +32,15 @@ import type { ProjectFileContext } from '@codebuff/common/util/file'
3332

3433
let mainPromptBaseParams: ParamsExcluding<typeof mainPrompt, 'action'>
3534

36-
const mockAgentStream = (streamOutput: string) => {
35+
import { createToolCallChunk } from './test-utils'
36+
37+
import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
38+
39+
const mockAgentStream = (chunks: StreamChunk[]) => {
3740
mainPromptBaseParams.promptAiSdkStream = async function* ({}) {
38-
yield { type: 'text' as const, text: streamOutput }
41+
for (const chunk of chunks) {
42+
yield chunk
43+
}
3944
return 'mock-message-id'
4045
}
4146
}
@@ -117,7 +122,7 @@ describe('mainPrompt', () => {
117122
)
118123

119124
// Mock LLM APIs
120-
mockAgentStream('Test response')
125+
mockAgentStream([{ type: 'text', text: 'Test response' }])
121126

122127
// Mock websocket actions
123128
mainPromptBaseParams.requestFiles = async ({ filePaths }) => {
@@ -196,15 +201,15 @@ describe('mainPrompt', () => {
196201
}
197202

198203
it('should handle write_file tool call', async () => {
199-
// Mock LLM to return a write_file tool call using getToolCallString
200-
const mockResponse =
201-
getToolCallString('write_file', {
204+
// Mock LLM to return a write_file tool call using native tool call chunks
205+
mockAgentStream([
206+
createToolCallChunk('write_file', {
202207
path: 'new-file.txt',
203208
instructions: 'Added Hello World',
204209
content: 'Hello, world!',
205-
}) + getToolCallString('end_turn', {})
206-
207-
mockAgentStream(mockResponse)
210+
}),
211+
createToolCallChunk('end_turn', {}),
212+
])
208213

209214
// Get reference to the spy so we can check if it was called
210215
const requestToolCallSpy = mainPromptBaseParams.requestToolCall
@@ -355,7 +360,7 @@ describe('mainPrompt', () => {
355360

356361
it('should return no tool calls when LLM response is empty', async () => {
357362
// Mock the LLM stream to return nothing
358-
mockAgentStream('')
363+
mockAgentStream([])
359364

360365
const sessionState = getInitialSessionState(mockFileContext)
361366
const action = {
@@ -380,16 +385,15 @@ describe('mainPrompt', () => {
380385
it('should unescape ampersands in run_terminal_command tool calls', async () => {
381386
const sessionState = getInitialSessionState(mockFileContext)
382387
const userPromptText = 'Run the backend tests'
383-
const escapedCommand = 'cd backend && bun test'
384388
const expectedCommand = 'cd backend && bun test'
385389

386-
const mockResponse =
387-
getToolCallString('run_terminal_command', {
388-
command: escapedCommand,
390+
mockAgentStream([
391+
createToolCallChunk('run_terminal_command', {
392+
command: expectedCommand,
389393
process_type: 'SYNC',
390-
}) + getToolCallString('end_turn', {})
391-
392-
mockAgentStream(mockResponse)
394+
}),
395+
createToolCallChunk('end_turn', {}),
396+
])
393397

394398
// Get reference to the spy so we can check if it was called
395399
const requestToolCallSpy = mainPromptBaseParams.requestToolCall

packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import * as bigquery from '@codebuff/bigquery'
22
import * as analytics from '@codebuff/common/analytics'
33
import { TEST_USER_ID } from '@codebuff/common/old-constants'
44
import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
5-
import { getToolCallString } from '@codebuff/common/tools/utils'
65
import { getInitialSessionState } from '@codebuff/common/types/session-state'
76
import * as stringUtils from '@codebuff/common/util/string'
87
import {
@@ -15,9 +14,11 @@ import {
1514
test,
1615
} from 'bun:test'
1716

18-
import { mockFileContext } from './test-utils'
17+
import { createToolCallChunk, mockFileContext } from './test-utils'
1918
import { processStream } from '../tools/stream-parser'
2019

20+
import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
21+
2122
import type { AgentTemplate } from '../templates/types'
2223
import type {
2324
AgentRuntimeDeps,
@@ -119,22 +120,26 @@ describe('malformed tool call error handling', () => {
119120
agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL }
120121
})
121122

122-
function createMockStream(chunks: string[]) {
123+
function createMockStream(chunks: StreamChunk[]) {
123124
async function* generator() {
124125
for (const chunk of chunks) {
125-
yield { type: 'text' as const, text: chunk }
126+
yield chunk
126127
}
127128
return 'mock-message-id'
128129
}
129130
return generator()
130131
}
131132

133+
function textChunk(text: string): StreamChunk {
134+
return { type: 'text' as const, text }
135+
}
136+
132137
test('should add tool result errors to message history after stream completes', async () => {
133-
const chunks = [
134-
// Malformed JSON tool call
135-
'<codebuff_tool_call>\n{\n "cb_tool_name": "read_files",\n "paths": ["test.ts"\n}\n</codebuff_tool_call>',
136-
// Valid end turn
137-
getToolCallString('end_turn', {}),
138+
// With native tools, malformed tool calls are handled at the API level.
139+
// This test now verifies that an unknown tool is properly handled.
140+
const chunks: StreamChunk[] = [
141+
createToolCallChunk('unknown_tool_xyz', { paths: ['test.ts'] }),
142+
createToolCallChunk('end_turn', {}),
138143
]
139144

140145
const stream = createMockStream(chunks)
@@ -152,7 +157,7 @@ describe('malformed tool call error handling', () => {
152157

153158
expect(toolMessages.length).toBeGreaterThan(0)
154159

155-
// Find the error tool result
160+
// Find the error tool result for the unknown tool
156161
const errorToolResult = toolMessages.find(
157162
(m) =>
158163
m.content?.[0]?.type === 'json' &&
@@ -162,17 +167,15 @@ describe('malformed tool call error handling', () => {
162167
expect(errorToolResult).toBeDefined()
163168
expect(
164169
(errorToolResult?.content?.[0] as any)?.value?.errorMessage,
165-
).toContain('Invalid JSON')
170+
).toContain('not found')
166171
})
167172

168-
test('should handle multiple malformed tool calls', async () => {
169-
const chunks = [
170-
// First malformed call
171-
'<codebuff_tool_call>\n{\n "cb_tool_name": "read_files",\n invalid\n}\n</codebuff_tool_call>',
172-
'Some text between calls',
173-
// Second malformed call
174-
'<codebuff_tool_call>\n{\n missing_quotes: value\n}\n</codebuff_tool_call>',
175-
getToolCallString('end_turn', {}),
173+
test('should handle multiple unknown tool calls', async () => {
174+
const chunks: StreamChunk[] = [
175+
createToolCallChunk('unknown_tool_1', { param: 'value1' }),
176+
textChunk('Some text between calls'),
177+
createToolCallChunk('unknown_tool_2', { param: 'value2' }),
178+
createToolCallChunk('end_turn', {}),
176179
]
177180

178181
const stream = createMockStream(chunks)
@@ -197,9 +200,9 @@ describe('malformed tool call error handling', () => {
197200
})
198201

199202
test('should preserve original toolResults array alongside message history', async () => {
200-
const chunks = [
201-
'<codebuff_tool_call>\n{\n "cb_tool_name": "read_files",\n malformed\n}\n</codebuff_tool_call>',
202-
getToolCallString('end_turn', {}),
203+
const chunks: StreamChunk[] = [
204+
createToolCallChunk('unknown_tool_xyz', { param: 'value' }),
205+
createToolCallChunk('end_turn', {}),
203206
]
204207

205208
const stream = createMockStream(chunks)
@@ -228,9 +231,9 @@ describe('malformed tool call error handling', () => {
228231
})
229232

230233
test('should handle unknown tool names and add error to message history', async () => {
231-
const chunks = [
232-
'<codebuff_tool_call>\n{\n "cb_tool_name": "unknown_tool",\n "param": "value"\n}\n</codebuff_tool_call>',
233-
getToolCallString('end_turn', {}),
234+
const chunks: StreamChunk[] = [
235+
createToolCallChunk('unknown_tool', { param: 'value' }),
236+
createToolCallChunk('end_turn', {}),
234237
]
235238

236239
const stream = createMockStream(chunks)
@@ -258,12 +261,12 @@ describe('malformed tool call error handling', () => {
258261
})
259262

260263
test('should not affect valid tool calls in message history', async () => {
261-
const chunks = [
264+
const chunks: StreamChunk[] = [
262265
// Valid tool call
263-
getToolCallString('read_files', { paths: ['test.ts'] }),
264-
// Malformed tool call
265-
'<codebuff_tool_call>\n{\n "cb_tool_name": "read_files",\n invalid\n}\n</codebuff_tool_call>',
266-
getToolCallString('end_turn', {}),
266+
createToolCallChunk('read_files', { paths: ['test.ts'] }),
267+
// Unknown tool call
268+
createToolCallChunk('unknown_tool_xyz', { param: 'value' }),
269+
createToolCallChunk('end_turn', {}),
267270
]
268271

269272
const stream = createMockStream(chunks)
@@ -299,10 +302,10 @@ describe('malformed tool call error handling', () => {
299302
expect(errorResults.length).toBeGreaterThan(0)
300303
})
301304

302-
test('should handle stream with only malformed calls', async () => {
303-
const chunks = [
304-
'<codebuff_tool_call>\n{\n invalid1\n}\n</codebuff_tool_call>',
305-
'<codebuff_tool_call>\n{\n invalid2\n}\n</codebuff_tool_call>',
305+
test('should handle stream with only unknown tool calls', async () => {
306+
const chunks: StreamChunk[] = [
307+
createToolCallChunk('unknown_tool_1', { param: 'value1' }),
308+
createToolCallChunk('unknown_tool_2', { param: 'value2' }),
306309
]
307310

308311
const stream = createMockStream(chunks)
@@ -320,7 +323,7 @@ describe('malformed tool call error handling', () => {
320323
toolMessages.forEach((msg) => {
321324
expect(msg.content?.[0]?.type).toBe('json')
322325
expect((msg.content?.[0] as any)?.value?.errorMessage).toContain(
323-
'Invalid JSON',
326+
'not found',
324327
)
325328
})
326329
})

0 commit comments

Comments
 (0)