Skip to content

Commit b6da300

Browse files
committed
feat: switch to new filepicker model (test top-2 filepicker)
1 parent 269a2af commit b6da300

File tree

5 files changed

+35
-18
lines changed

5 files changed

+35
-18
lines changed

backend/src/find-files/request-files-prompt.ts

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -263,22 +263,17 @@ async function getRelevantFiles(
263263
const start = performance.now()
264264
let coreMessages = coreMessagesWithSystem(messagesWithPrompt, system)
265265

266-
if (costMode === 'experimental') {
267-
coreMessages = coreMessages
268-
.map((msg, i) => {
269-
if (msg.role === 'assistant' && i !== coreMessages.length - 1) {
270-
return castAssistantMessage(msg)
271-
} else {
272-
return msg
273-
}
274-
})
275-
.filter((msg) => msg !== null)
276-
}
277-
// This finetunedModel is used for the promptFlashWithFallbacks call
278-
const finetunedModel =
279-
costMode === 'experimental'
280-
? finetunedVertexModels.ft_filepicker_010
281-
: finetunedVertexModels.ft_filepicker_005
266+
// Converts assistant messages to user messages for finetuned model
267+
coreMessages = coreMessages
268+
.map((msg, i) => {
269+
if (msg.role === 'assistant' && i !== coreMessages.length - 1) {
270+
return castAssistantMessage(msg)
271+
} else {
272+
return msg
273+
}
274+
})
275+
.filter((msg) => msg !== null)
276+
const finetunedModel = finetunedVertexModels.ft_filepicker_010
282277

283278
let response = await promptFlashWithFallbacks(coreMessages, {
284279
clientSessionId,

common/src/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ export const finetunedVertexModels = {
197197
ft_filepicker_topk_002: '1694861989844615168',
198198
ft_filepicker_010: '3808739064941641728',
199199
ft_filepicker_010_epoch_2: '6231675664466968576',
200+
ft_filepicker_topk_003: '1502192368286171136',
200201
} as const
201202
export const finetunedVertexModelNames: Record<string, string> = {
202203
[finetunedVertexModels.ft_filepicker_003]: 'ft_filepicker_003',
@@ -208,6 +209,7 @@ export const finetunedVertexModelNames: Record<string, string> = {
208209
[finetunedVertexModels.ft_filepicker_010]: 'ft_filepicker_010',
209210
[finetunedVertexModels.ft_filepicker_010_epoch_2]:
210211
'ft_filepicker_010_epoch_2',
212+
[finetunedVertexModels.ft_filepicker_topk_003]: 'ft_filepicker_topk_003',
211213
}
212214
export type FinetunedVertexModel =
213215
(typeof finetunedVertexModels)[keyof typeof finetunedVertexModels]

scripts/ft-file-selection/README.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
`ft_filepicker_010`
106106
- Gemini Endpoint ID: 3808739064941641728
107107
- Finetune date: June 2, 2025
108-
- Finetune file: ??? TODO
108+
- Finetune file: gemini-tune-data-048.jsonl
109109
- Finetune samples: 2648 messages
110110
- Tokens: 109M
111111
- Epochs: 3
@@ -121,6 +121,21 @@
121121
- ~$980 for finetuning ($3.00/1M tokens * 3 epochs * 109M tokens)
122122
- Reuses relabel inputs from ft_filepicker_008 - no additional labeling cost
123123

124+
`ft_filepicker_topk_003`
125+
- Gemini Endpoint ID: 1502192368286171136
126+
- Finetune date: Jun 5, 2025
127+
- Finetune file: gemini-tune-data-048-top2.jsonl, available in the GCS bucket
128+
- Finetune samples: 2648 messages
129+
- Tokens: 109M
130+
- Epochs: 2
131+
- Base model: gemini-2.0-flash-001
132+
- Distilled model: claude-opus-4-20250514
133+
- Notes:
134+
- Combines characteristics of `ft_filepicker_topk_002` and `ft_filepicker_010`, ie: uses top-2, with 3 lines of dashes at the end, but also blobbifies message history.
135+
- Est costs:
136+
- ~$660 for finetuning ($3.00/1M tokens * 2 epochs * 109M tokens)
137+
- Mostly reuses relabel inputs from other Claude runs - no additional cost anticipated.
138+
124139
## Scripts
125140

126141
Contains a variety of scripts for inspecting and processing finetuning data from BigQuery.

scripts/ft-file-selection/print-recent-relabels.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ User ID: ${relabel.user_id}
2222
Agent Step ID: ${relabel.agent_step_id}
2323
Created at: ${JSON.stringify(relabel.created_at)}
2424
Payload: ${JSON.stringify(relabel.payload).slice(0, 100)}...
25+
Model: ${relabel.model}
26+
Output: ${JSON.stringify(relabel.payload.output)}
2527
--------------------------------`)
2628
})
2729
} catch (error) {

scripts/ft-file-selection/relabel-for-offline-scoring.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import { isValidationSample } from './collect-tuning-data'
2525
const isProd = process.argv.includes('--prod')
2626
const DATASET = isProd ? 'codebuff_data' : 'codebuff_data_dev'
2727
const MAX_PARALLEL = 5 // Maximum number of traces to process in parallel for relabeling
28-
const LIMIT = 4000 // Total limit of traces to process
28+
const LIMIT = 400 // Total limit of traces to process
2929
const START_CURSOR = '2025-05-31T00:00:00.000Z' // User-provided start cursor or default
3030

3131
const GROUND_TRUTH_MODEL = 'claude-opus-4-20250514-with-full-file-context-new'
@@ -37,6 +37,7 @@ const MODELS = [
3737
finetunedVertexModels.ft_filepicker_008,
3838
finetunedVertexModels.ft_filepicker_010,
3939
finetunedVertexModels.ft_filepicker_010_epoch_2,
40+
finetunedVertexModels.ft_filepicker_topk_003,
4041
] as const
4142

4243
const modelDescriptions = {
@@ -52,6 +53,8 @@ const modelDescriptions = {
5253
'ft_filepicker_010: 109M tokens, 3 epochs, same as ft_filepicker_008 but with assistant messages converted to user messages',
5354
[finetunedVertexModels.ft_filepicker_010_epoch_2]:
5455
'ft_filepicker_010_epoch_2: 109M tokens, 2 epochs, same as ft_filepicker_008 but with assistant messages converted to user messages',
56+
[finetunedVertexModels.ft_filepicker_topk_003]:
57+
'ft_filepicker_topk_003: 109M tokens, 2 epochs, only uses top-2 files with 3 lines of dashes at the end',
5558
}
5659

5760
async function getFilteredValidationBundles(): Promise<

0 commit comments

Comments
 (0)