Skip to content

Commit 6aa811c

Browse files
committed
refactor: remove Gemini flash dynamic-thinking model as its enabled by default
Also splits up the providers into individual files for better maintenance.
1 parent 22fb5f6 commit 6aa811c

File tree

7 files changed

+210
-133
lines changed

7 files changed

+210
-133
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import {AnthropicProviderOptions} from '@ai-sdk/anthropic';
2+
import {GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
3+
import {OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
4+
import {LanguageModel} from 'ai';
5+
6+
export type ModelOptions = {
7+
model: LanguageModel;
8+
providerOptions:
9+
| {anthropic: AnthropicProviderOptions}
10+
| {google: GoogleGenerativeAIProviderOptions}
11+
| {openai: OpenAIResponsesProviderOptions};
12+
};

runner/codegen/ai-sdk-runner.ts renamed to runner/codegen/ai-sdk/ai-sdk-runner.ts

Lines changed: 26 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
1-
import {
2-
LlmRunner,
3-
LocalLlmConstrainedOutputGenerateRequestOptions,
4-
LocalLlmConstrainedOutputGenerateResponse,
5-
LocalLlmGenerateFilesRequestOptions,
6-
LocalLlmGenerateFilesResponse,
7-
LocalLlmGenerateTextRequestOptions,
8-
LocalLlmGenerateTextResponse,
9-
PromptDataMessage,
10-
} from './llm-runner.js';
1+
import {AnthropicProviderOptions} from '@ai-sdk/anthropic';
2+
import {GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
3+
import {OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
114
import {
125
FilePart,
136
generateObject,
@@ -16,43 +9,31 @@ import {
169
ModelMessage,
1710
SystemModelMessage,
1811
TextPart,
19-
wrapLanguageModel,
2012
} from 'ai';
21-
import {google, GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
22-
import {anthropic, AnthropicProviderOptions} from '@ai-sdk/anthropic';
23-
import {openai, OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
2413
import z from 'zod';
25-
import {callWithTimeout} from '../utils/timeout.js';
26-
import {combineAbortSignals} from '../utils/abort-signal.js';
27-
import {anthropicThinkingWithStructuredResponseMiddleware} from './ai-sdk-claude-thinking-patch.js';
14+
import {combineAbortSignals} from '../../utils/abort-signal.js';
15+
import {callWithTimeout} from '../../utils/timeout.js';
16+
import {
17+
LlmRunner,
18+
LocalLlmConstrainedOutputGenerateRequestOptions,
19+
LocalLlmConstrainedOutputGenerateResponse,
20+
LocalLlmGenerateFilesRequestOptions,
21+
LocalLlmGenerateFilesResponse,
22+
LocalLlmGenerateTextRequestOptions,
23+
LocalLlmGenerateTextResponse,
24+
PromptDataMessage,
25+
} from '../llm-runner.js';
26+
import {ANTHROPIC_MODELS, getAiSdkModelOptionsForAnthropic} from './anthropic.js';
27+
import {getAiSdkModelOptionsForGoogle, GOOGLE_MODELS} from './google.js';
28+
import {getAiSdkModelOptionsForOpenAI, OPENAI_MODELS} from './openai.js';
2829

29-
const SUPPORTED_MODELS = [
30-
'claude-opus-4.1-no-thinking',
31-
'claude-opus-4.1-with-thinking-16k',
32-
'claude-opus-4.1-with-thinking-32k',
33-
'claude-sonnet-4.5-no-thinking',
34-
'claude-sonnet-4.5-with-thinking-16k',
35-
'claude-sonnet-4.5-with-thinking-32k',
36-
'gemini-2.5-flash-lite',
37-
'gemini-2.5-flash',
38-
'gemini-2.5-flash-with-thinking-dynamic',
39-
'gemini-2.5-flash-with-thinking-16k',
40-
'gemini-2.5-flash-with-thinking-24k',
41-
'gemini-2.5-pro',
42-
'gemini-3-pro-preview',
43-
'gpt-5.1-no-thinking',
44-
'gpt-5.1-thinking-low',
45-
'gpt-5.1-thinking-high',
46-
'gpt-5.1-thinking-medium',
47-
] as const;
30+
const SUPPORTED_MODELS = [...GOOGLE_MODELS, ...ANTHROPIC_MODELS, ...OPENAI_MODELS] as const;
4831

4932
// Increased to a very high value as we rely on an actual timeout
5033
// that aborts stuck LLM requests. WCS is targeting stability here;
5134
// even if it involves many exponential backoff-waiting.
5235
const DEFAULT_MAX_RETRIES = 100000;
5336

54-
const claude16kThinkingTokenBudget = 16_000;
55-
const claude32kThinkingTokenBudget = 32_000;
5637
export class AiSDKRunner implements LlmRunner {
5738
displayName = 'AI SDK';
5839
id = 'ai-sdk';
@@ -164,100 +145,14 @@ export class AiSDKRunner implements LlmRunner {
164145
| {google: GoogleGenerativeAIProviderOptions}
165146
| {openai: OpenAIResponsesProviderOptions};
166147
}> {
167-
const modelName = request.model as (typeof SUPPORTED_MODELS)[number];
168-
switch (modelName) {
169-
case 'claude-opus-4.1-no-thinking':
170-
case 'claude-opus-4.1-with-thinking-16k':
171-
case 'claude-opus-4.1-with-thinking-32k':
172-
case 'claude-sonnet-4.5-no-thinking':
173-
case 'claude-sonnet-4.5-with-thinking-16k':
174-
case 'claude-sonnet-4.5-with-thinking-32k': {
175-
const thinkingEnabled = modelName.includes('-with-thinking');
176-
const thinkingBudget = !thinkingEnabled
177-
? undefined
178-
: modelName.endsWith('-32k')
179-
? claude32kThinkingTokenBudget
180-
: claude16kThinkingTokenBudget;
181-
const isOpus4_1Model = modelName.includes('opus-4.1');
182-
const model = anthropic(isOpus4_1Model ? 'claude-opus-4-1' : 'claude-sonnet-4-5');
183-
return {
184-
model: thinkingEnabled
185-
? wrapLanguageModel({
186-
model,
187-
middleware: anthropicThinkingWithStructuredResponseMiddleware,
188-
})
189-
: model,
190-
providerOptions: {
191-
anthropic: {
192-
sendReasoning: thinkingEnabled,
193-
thinking: {
194-
type: thinkingEnabled ? 'enabled' : 'disabled',
195-
budgetTokens: thinkingBudget,
196-
},
197-
} satisfies AnthropicProviderOptions,
198-
},
199-
};
200-
}
201-
case 'gemini-2.5-flash-lite':
202-
case 'gemini-2.5-flash':
203-
case 'gemini-2.5-pro':
204-
case 'gemini-3-pro-preview':
205-
return {
206-
model: google(modelName),
207-
providerOptions: {
208-
google: {
209-
thinkingConfig: {
210-
includeThoughts: request.thinkingConfig?.includeThoughts,
211-
},
212-
} satisfies GoogleGenerativeAIProviderOptions,
213-
},
214-
};
215-
case 'gemini-2.5-flash-with-thinking-dynamic':
216-
case 'gemini-2.5-flash-with-thinking-16k':
217-
case 'gemini-2.5-flash-with-thinking-24k':
218-
// -1 means "dynamic thinking budget":
219-
// https://ai.google.dev/gemini-api/docs/thinking#set-budget.
220-
let thinkingBudget = -1;
221-
if (modelName.endsWith('-16k')) {
222-
thinkingBudget = 16_000;
223-
} else if (modelName.endsWith('-24k')) {
224-
thinkingBudget = 24_000;
225-
}
226-
return {
227-
model: google('gemini-2.5-flash'),
228-
providerOptions: {
229-
google: {
230-
thinkingConfig: {
231-
thinkingBudget: thinkingBudget,
232-
includeThoughts: true,
233-
},
234-
} satisfies GoogleGenerativeAIProviderOptions,
235-
},
236-
};
237-
case 'gpt-5.1-no-thinking':
238-
case 'gpt-5.1-thinking-low':
239-
case 'gpt-5.1-thinking-medium':
240-
case 'gpt-5.1-thinking-high':
241-
let reasoningEffort: string = 'none';
242-
if (modelName === 'gpt-5.1-thinking-high') {
243-
reasoningEffort = 'high';
244-
} else if (modelName === 'gpt-5.1-thinking-medium') {
245-
reasoningEffort = 'medium';
246-
} else if (modelName === 'gpt-5.1-thinking-low') {
247-
reasoningEffort = 'low';
248-
}
249-
return {
250-
model: openai('gpt-5.1'),
251-
providerOptions: {
252-
openai: {
253-
reasoningEffort,
254-
reasoningSummary: 'detailed',
255-
} satisfies OpenAIResponsesProviderOptions,
256-
},
257-
};
258-
default:
259-
throw new Error(`Unexpected model in AI SDK runner: ${request.model}.`);
148+
const result =
149+
(await getAiSdkModelOptionsForGoogle(request.model)) ??
150+
(await getAiSdkModelOptionsForAnthropic(request.model)) ??
151+
(await getAiSdkModelOptionsForOpenAI(request.model));
152+
if (result === null) {
153+
throw new Error(`Unexpected unsupported model: ${request.model}`);
260154
}
155+
return result;
261156
}
262157

263158
private _convertRequestToMessagesList(

runner/codegen/ai-sdk/anthropic.ts

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import {anthropic, AnthropicProviderOptions} from '@ai-sdk/anthropic';
2+
import {wrapLanguageModel} from 'ai';
3+
import {anthropicThinkingWithStructuredResponseMiddleware} from './anthropic_thinking_patch.js';
4+
import {ModelOptions} from './ai-sdk-model-options.js';
5+
6+
export const ANTHROPIC_MODELS = [
7+
'claude-opus-4.1-no-thinking',
8+
'claude-opus-4.1-with-thinking-16k',
9+
'claude-opus-4.1-with-thinking-32k',
10+
'claude-sonnet-4.5-no-thinking',
11+
'claude-sonnet-4.5-with-thinking-16k',
12+
'claude-sonnet-4.5-with-thinking-32k',
13+
] as const;
14+
15+
export async function getAiSdkModelOptionsForAnthropic(
16+
rawModelName: string,
17+
): Promise<ModelOptions | null> {
18+
const modelName = rawModelName as (typeof ANTHROPIC_MODELS)[number];
19+
20+
switch (modelName) {
21+
case 'claude-opus-4.1-no-thinking':
22+
case 'claude-opus-4.1-with-thinking-16k':
23+
case 'claude-opus-4.1-with-thinking-32k':
24+
case 'claude-sonnet-4.5-no-thinking':
25+
case 'claude-sonnet-4.5-with-thinking-16k':
26+
case 'claude-sonnet-4.5-with-thinking-32k': {
27+
const thinkingEnabled = modelName.includes('-with-thinking');
28+
const thinkingBudget = !thinkingEnabled
29+
? undefined
30+
: modelName.endsWith('-32k')
31+
? 32_000
32+
: 16_000;
33+
const isOpus4_1Model = modelName.includes('opus-4.1');
34+
const model = anthropic(isOpus4_1Model ? 'claude-opus-4-1' : 'claude-sonnet-4-5');
35+
return {
36+
model: thinkingEnabled
37+
? wrapLanguageModel({
38+
model,
39+
middleware: anthropicThinkingWithStructuredResponseMiddleware,
40+
})
41+
: model,
42+
providerOptions: {
43+
anthropic: {
44+
sendReasoning: thinkingEnabled,
45+
thinking: {
46+
type: thinkingEnabled ? 'enabled' : 'disabled',
47+
budgetTokens: thinkingBudget,
48+
},
49+
} satisfies AnthropicProviderOptions,
50+
},
51+
};
52+
}
53+
default:
54+
return null;
55+
}
56+
}

runner/codegen/ai-sdk/google.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import {google, GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
2+
import {ModelOptions} from './ai-sdk-model-options.js';
3+
4+
export const GOOGLE_MODELS = [
5+
'gemini-2.5-flash-lite',
6+
'gemini-2.5-flash',
7+
'gemini-2.5-flash-no-thinking',
8+
'gemini-2.5-flash-with-thinking-16k',
9+
'gemini-2.5-flash-with-thinking-24k',
10+
'gemini-2.5-pro',
11+
'gemini-3-pro-preview',
12+
] as const;
13+
14+
export async function getAiSdkModelOptionsForGoogle(
15+
rawModelName: string,
16+
): Promise<ModelOptions | null> {
17+
const modelName = rawModelName as (typeof GOOGLE_MODELS)[number];
18+
19+
switch (modelName) {
20+
case 'gemini-2.5-flash-lite':
21+
case 'gemini-2.5-flash':
22+
case 'gemini-2.5-pro':
23+
case 'gemini-3-pro-preview':
24+
return {
25+
model: google(modelName),
26+
providerOptions: {
27+
google: {
28+
thinkingConfig: {
29+
includeThoughts: true,
30+
},
31+
} satisfies GoogleGenerativeAIProviderOptions,
32+
},
33+
};
34+
case 'gemini-2.5-flash-no-thinking': {
35+
return {
36+
model: google('gemini-2.5-flash'),
37+
providerOptions: {
38+
google: {
39+
thinkingConfig: {
40+
thinkingBudget: 0,
41+
},
42+
},
43+
},
44+
};
45+
}
46+
case 'gemini-2.5-flash-with-thinking-16k':
47+
case 'gemini-2.5-flash-with-thinking-24k':
48+
let thinkingBudget: number;
49+
if (modelName.endsWith('-16k')) {
50+
thinkingBudget = 16_000;
51+
} else if (modelName.endsWith('-24k')) {
52+
thinkingBudget = 24_000;
53+
} else {
54+
throw new Error(`Unexpected model: ${modelName}`);
55+
}
56+
57+
return {
58+
model: google('gemini-2.5-flash'),
59+
providerOptions: {
60+
google: {
61+
thinkingConfig: {
62+
thinkingBudget: thinkingBudget,
63+
includeThoughts: true,
64+
},
65+
} satisfies GoogleGenerativeAIProviderOptions,
66+
},
67+
};
68+
default:
69+
return null;
70+
}
71+
}

runner/codegen/ai-sdk/openai.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import {openai, OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
2+
import {ModelOptions} from './ai-sdk-model-options.js';
3+
4+
export const OPENAI_MODELS = [
5+
'gpt-5.1-no-thinking',
6+
'gpt-5.1-thinking-low',
7+
'gpt-5.1-thinking-high',
8+
'gpt-5.1-thinking-medium',
9+
] as const;
10+
11+
export async function getAiSdkModelOptionsForOpenAI(
12+
rawModelName: string,
13+
): Promise<ModelOptions | null> {
14+
const modelName = rawModelName as (typeof OPENAI_MODELS)[number];
15+
16+
switch (modelName) {
17+
case 'gpt-5.1-no-thinking':
18+
case 'gpt-5.1-thinking-low':
19+
case 'gpt-5.1-thinking-medium':
20+
case 'gpt-5.1-thinking-high':
21+
let reasoningEffort: string = 'none';
22+
if (modelName === 'gpt-5.1-thinking-high') {
23+
reasoningEffort = 'high';
24+
} else if (modelName === 'gpt-5.1-thinking-medium') {
25+
reasoningEffort = 'medium';
26+
} else if (modelName === 'gpt-5.1-thinking-low') {
27+
reasoningEffort = 'low';
28+
}
29+
return {
30+
model: openai('gpt-5.1'),
31+
providerOptions: {
32+
openai: {
33+
reasoningEffort,
34+
reasoningSummary: 'detailed',
35+
} satisfies OpenAIResponsesProviderOptions,
36+
},
37+
};
38+
default:
39+
return null;
40+
}
41+
}

runner/codegen/runner-creation.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import type {ClaudeCodeRunner} from './claude-code-runner.js';
44
import type {GenkitRunner} from './genkit/genkit-runner.js';
55
import type {CodexRunner} from './codex-runner.js';
66
import type {NoopUnimplementedRunner} from './noop-unimplemented-runner.js';
7-
import {AiSDKRunner} from './ai-sdk-runner.js';
7+
import {AiSDKRunner} from './ai-sdk/ai-sdk-runner.js';
88

99
interface AvailableRunners {
1010
genkit: GenkitRunner;
@@ -30,7 +30,9 @@ export async function getRunnerByName<T extends RunnerName>(name: T): Promise<Av
3030
m => new m.GenkitRunner() as AvailableRunners[T],
3131
);
3232
case 'ai-sdk':
33-
return import('./ai-sdk-runner.js').then(m => new m.AiSDKRunner() as AvailableRunners[T]);
33+
return import('./ai-sdk/ai-sdk-runner.js').then(
34+
m => new m.AiSDKRunner() as AvailableRunners[T],
35+
);
3436
case 'gemini-cli':
3537
return import('./gemini-cli-runner.js').then(
3638
m => new m.GeminiCliRunner() as AvailableRunners[T],

0 commit comments

Comments
 (0)