refactor: remove Gemini flash dynamic-thinking model as its enabled by default

devversion · devversion · commit 6aa811c011a2 · 2025-11-24T20:05:26.000+01:00
Also splits up the providers into individual files for better
maintenance.
diff --git a/runner/codegen/ai-sdk/ai-sdk-model-options.ts b/runner/codegen/ai-sdk/ai-sdk-model-options.ts
@@ -0,0 +1,12 @@
+import {AnthropicProviderOptions} from '@ai-sdk/anthropic';
+import {GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
+import {OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
+import {LanguageModel} from 'ai';
+
+export type ModelOptions = {
+  model: LanguageModel;
+  providerOptions:
+    | {anthropic: AnthropicProviderOptions}
+    | {google: GoogleGenerativeAIProviderOptions}
+    | {openai: OpenAIResponsesProviderOptions};
+};
diff --git a/runner/codegen/ai-sdk/ai-sdk-runner.ts b/runner/codegen/ai-sdk/ai-sdk-runner.ts
@@ -1,13 +1,6 @@
-import {
-  LlmRunner,
-  LocalLlmConstrainedOutputGenerateRequestOptions,
-  LocalLlmConstrainedOutputGenerateResponse,
-  LocalLlmGenerateFilesRequestOptions,
-  LocalLlmGenerateFilesResponse,
-  LocalLlmGenerateTextRequestOptions,
-  LocalLlmGenerateTextResponse,
-  PromptDataMessage,
-} from './llm-runner.js';
+import {AnthropicProviderOptions} from '@ai-sdk/anthropic';
+import {GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
+import {OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
 import {
   FilePart,
   generateObject,
@@ -16,43 +9,31 @@ import {
   ModelMessage,
   SystemModelMessage,
   TextPart,
-  wrapLanguageModel,
 } from 'ai';
-import {google, GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
-import {anthropic, AnthropicProviderOptions} from '@ai-sdk/anthropic';
-import {openai, OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
 import z from 'zod';
-import {callWithTimeout} from '../utils/timeout.js';
-import {combineAbortSignals} from '../utils/abort-signal.js';
-import {anthropicThinkingWithStructuredResponseMiddleware} from './ai-sdk-claude-thinking-patch.js';
+import {combineAbortSignals} from '../../utils/abort-signal.js';
+import {callWithTimeout} from '../../utils/timeout.js';
+import {
+  LlmRunner,
+  LocalLlmConstrainedOutputGenerateRequestOptions,
+  LocalLlmConstrainedOutputGenerateResponse,
+  LocalLlmGenerateFilesRequestOptions,
+  LocalLlmGenerateFilesResponse,
+  LocalLlmGenerateTextRequestOptions,
+  LocalLlmGenerateTextResponse,
+  PromptDataMessage,
+} from '../llm-runner.js';
+import {ANTHROPIC_MODELS, getAiSdkModelOptionsForAnthropic} from './anthropic.js';
+import {getAiSdkModelOptionsForGoogle, GOOGLE_MODELS} from './google.js';
+import {getAiSdkModelOptionsForOpenAI, OPENAI_MODELS} from './openai.js';
 
-const SUPPORTED_MODELS = [
-  'claude-opus-4.1-no-thinking',
-  'claude-opus-4.1-with-thinking-16k',
-  'claude-opus-4.1-with-thinking-32k',
-  'claude-sonnet-4.5-no-thinking',
-  'claude-sonnet-4.5-with-thinking-16k',
-  'claude-sonnet-4.5-with-thinking-32k',
-  'gemini-2.5-flash-lite',
-  'gemini-2.5-flash',
-  'gemini-2.5-flash-with-thinking-dynamic',
-  'gemini-2.5-flash-with-thinking-16k',
-  'gemini-2.5-flash-with-thinking-24k',
-  'gemini-2.5-pro',
-  'gemini-3-pro-preview',
-  'gpt-5.1-no-thinking',
-  'gpt-5.1-thinking-low',
-  'gpt-5.1-thinking-high',
-  'gpt-5.1-thinking-medium',
-] as const;
+const SUPPORTED_MODELS = [...GOOGLE_MODELS, ...ANTHROPIC_MODELS, ...OPENAI_MODELS] as const;
 
 // Increased to a very high value as we rely on an actual timeout
 // that aborts stuck LLM requests. WCS is targeting stability here;
 // even if it involves many exponential backoff-waiting.
 const DEFAULT_MAX_RETRIES = 100000;
 
-const claude16kThinkingTokenBudget = 16_000;
-const claude32kThinkingTokenBudget = 32_000;
 export class AiSDKRunner implements LlmRunner {
   displayName = 'AI SDK';
   id = 'ai-sdk';
@@ -164,100 +145,14 @@ export class AiSDKRunner implements LlmRunner {
       | {google: GoogleGenerativeAIProviderOptions}
       | {openai: OpenAIResponsesProviderOptions};
   }> {
-    const modelName = request.model as (typeof SUPPORTED_MODELS)[number];
-    switch (modelName) {
-      case 'claude-opus-4.1-no-thinking':
-      case 'claude-opus-4.1-with-thinking-16k':
-      case 'claude-opus-4.1-with-thinking-32k':
-      case 'claude-sonnet-4.5-no-thinking':
-      case 'claude-sonnet-4.5-with-thinking-16k':
-      case 'claude-sonnet-4.5-with-thinking-32k': {
-        const thinkingEnabled = modelName.includes('-with-thinking');
-        const thinkingBudget = !thinkingEnabled
-          ? undefined
-          : modelName.endsWith('-32k')
-            ? claude32kThinkingTokenBudget
-            : claude16kThinkingTokenBudget;
-        const isOpus4_1Model = modelName.includes('opus-4.1');
-        const model = anthropic(isOpus4_1Model ? 'claude-opus-4-1' : 'claude-sonnet-4-5');
-        return {
-          model: thinkingEnabled
-            ? wrapLanguageModel({
-                model,
-                middleware: anthropicThinkingWithStructuredResponseMiddleware,
-              })
-            : model,
-          providerOptions: {
-            anthropic: {
-              sendReasoning: thinkingEnabled,
-              thinking: {
-                type: thinkingEnabled ? 'enabled' : 'disabled',
-                budgetTokens: thinkingBudget,
-              },
-            } satisfies AnthropicProviderOptions,
-          },
-        };
-      }
-      case 'gemini-2.5-flash-lite':
-      case 'gemini-2.5-flash':
-      case 'gemini-2.5-pro':
-      case 'gemini-3-pro-preview':
-        return {
-          model: google(modelName),
-          providerOptions: {
-            google: {
-              thinkingConfig: {
-                includeThoughts: request.thinkingConfig?.includeThoughts,
-              },
-            } satisfies GoogleGenerativeAIProviderOptions,
-          },
-        };
-      case 'gemini-2.5-flash-with-thinking-dynamic':
-      case 'gemini-2.5-flash-with-thinking-16k':
-      case 'gemini-2.5-flash-with-thinking-24k':
-        // -1 means "dynamic thinking budget":
-        // https://ai.google.dev/gemini-api/docs/thinking#set-budget.
-        let thinkingBudget = -1;
-        if (modelName.endsWith('-16k')) {
-          thinkingBudget = 16_000;
-        } else if (modelName.endsWith('-24k')) {
-          thinkingBudget = 24_000;
-        }
-        return {
-          model: google('gemini-2.5-flash'),
-          providerOptions: {
-            google: {
-              thinkingConfig: {
-                thinkingBudget: thinkingBudget,
-                includeThoughts: true,
-              },
-            } satisfies GoogleGenerativeAIProviderOptions,
-          },
-        };
-      case 'gpt-5.1-no-thinking':
-      case 'gpt-5.1-thinking-low':
-      case 'gpt-5.1-thinking-medium':
-      case 'gpt-5.1-thinking-high':
-        let reasoningEffort: string = 'none';
-        if (modelName === 'gpt-5.1-thinking-high') {
-          reasoningEffort = 'high';
-        } else if (modelName === 'gpt-5.1-thinking-medium') {
-          reasoningEffort = 'medium';
-        } else if (modelName === 'gpt-5.1-thinking-low') {
-          reasoningEffort = 'low';
-        }
-        return {
-          model: openai('gpt-5.1'),
-          providerOptions: {
-            openai: {
-              reasoningEffort,
-              reasoningSummary: 'detailed',
-            } satisfies OpenAIResponsesProviderOptions,
-          },
-        };
-      default:
-        throw new Error(`Unexpected model in AI SDK runner: ${request.model}.`);
+    const result =
+      (await getAiSdkModelOptionsForGoogle(request.model)) ??
+      (await getAiSdkModelOptionsForAnthropic(request.model)) ??
+      (await getAiSdkModelOptionsForOpenAI(request.model));
+    if (result === null) {
+      throw new Error(`Unexpected unsupported model: ${request.model}`);
     }
+    return result;
   }
 
   private _convertRequestToMessagesList(
diff --git a/runner/codegen/ai-sdk/anthropic.ts b/runner/codegen/ai-sdk/anthropic.ts
@@ -0,0 +1,56 @@
+import {anthropic, AnthropicProviderOptions} from '@ai-sdk/anthropic';
+import {wrapLanguageModel} from 'ai';
+import {anthropicThinkingWithStructuredResponseMiddleware} from './anthropic_thinking_patch.js';
+import {ModelOptions} from './ai-sdk-model-options.js';
+
+export const ANTHROPIC_MODELS = [
+  'claude-opus-4.1-no-thinking',
+  'claude-opus-4.1-with-thinking-16k',
+  'claude-opus-4.1-with-thinking-32k',
+  'claude-sonnet-4.5-no-thinking',
+  'claude-sonnet-4.5-with-thinking-16k',
+  'claude-sonnet-4.5-with-thinking-32k',
+] as const;
+
+export async function getAiSdkModelOptionsForAnthropic(
+  rawModelName: string,
+): Promise<ModelOptions | null> {
+  const modelName = rawModelName as (typeof ANTHROPIC_MODELS)[number];
+
+  switch (modelName) {
+    case 'claude-opus-4.1-no-thinking':
+    case 'claude-opus-4.1-with-thinking-16k':
+    case 'claude-opus-4.1-with-thinking-32k':
+    case 'claude-sonnet-4.5-no-thinking':
+    case 'claude-sonnet-4.5-with-thinking-16k':
+    case 'claude-sonnet-4.5-with-thinking-32k': {
+      const thinkingEnabled = modelName.includes('-with-thinking');
+      const thinkingBudget = !thinkingEnabled
+        ? undefined
+        : modelName.endsWith('-32k')
+          ? 32_000
+          : 16_000;
+      const isOpus4_1Model = modelName.includes('opus-4.1');
+      const model = anthropic(isOpus4_1Model ? 'claude-opus-4-1' : 'claude-sonnet-4-5');
+      return {
+        model: thinkingEnabled
+          ? wrapLanguageModel({
+              model,
+              middleware: anthropicThinkingWithStructuredResponseMiddleware,
+            })
+          : model,
+        providerOptions: {
+          anthropic: {
+            sendReasoning: thinkingEnabled,
+            thinking: {
+              type: thinkingEnabled ? 'enabled' : 'disabled',
+              budgetTokens: thinkingBudget,
+            },
+          } satisfies AnthropicProviderOptions,
+        },
+      };
+    }
+    default:
+      return null;
+  }
+}
diff --git a/runner/codegen/ai-sdk/anthropic_thinking_patch.ts b/runner/codegen/ai-sdk/anthropic_thinking_patch.ts
diff --git a/runner/codegen/ai-sdk/google.ts b/runner/codegen/ai-sdk/google.ts
@@ -0,0 +1,71 @@
+import {google, GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
+import {ModelOptions} from './ai-sdk-model-options.js';
+
+export const GOOGLE_MODELS = [
+  'gemini-2.5-flash-lite',
+  'gemini-2.5-flash',
+  'gemini-2.5-flash-no-thinking',
+  'gemini-2.5-flash-with-thinking-16k',
+  'gemini-2.5-flash-with-thinking-24k',
+  'gemini-2.5-pro',
+  'gemini-3-pro-preview',
+] as const;
+
+export async function getAiSdkModelOptionsForGoogle(
+  rawModelName: string,
+): Promise<ModelOptions | null> {
+  const modelName = rawModelName as (typeof GOOGLE_MODELS)[number];
+
+  switch (modelName) {
+    case 'gemini-2.5-flash-lite':
+    case 'gemini-2.5-flash':
+    case 'gemini-2.5-pro':
+    case 'gemini-3-pro-preview':
+      return {
+        model: google(modelName),
+        providerOptions: {
+          google: {
+            thinkingConfig: {
+              includeThoughts: true,
+            },
+          } satisfies GoogleGenerativeAIProviderOptions,
+        },
+      };
+    case 'gemini-2.5-flash-no-thinking': {
+      return {
+        model: google('gemini-2.5-flash'),
+        providerOptions: {
+          google: {
+            thinkingConfig: {
+              thinkingBudget: 0,
+            },
+          },
+        },
+      };
+    }
+    case 'gemini-2.5-flash-with-thinking-16k':
+    case 'gemini-2.5-flash-with-thinking-24k':
+      let thinkingBudget: number;
+      if (modelName.endsWith('-16k')) {
+        thinkingBudget = 16_000;
+      } else if (modelName.endsWith('-24k')) {
+        thinkingBudget = 24_000;
+      } else {
+        throw new Error(`Unexpected model: ${modelName}`);
+      }
+
+      return {
+        model: google('gemini-2.5-flash'),
+        providerOptions: {
+          google: {
+            thinkingConfig: {
+              thinkingBudget: thinkingBudget,
+              includeThoughts: true,
+            },
+          } satisfies GoogleGenerativeAIProviderOptions,
+        },
+      };
+    default:
+      return null;
+  }
+}
diff --git a/runner/codegen/ai-sdk/openai.ts b/runner/codegen/ai-sdk/openai.ts
@@ -0,0 +1,41 @@
+import {openai, OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
+import {ModelOptions} from './ai-sdk-model-options.js';
+
+export const OPENAI_MODELS = [
+  'gpt-5.1-no-thinking',
+  'gpt-5.1-thinking-low',
+  'gpt-5.1-thinking-high',
+  'gpt-5.1-thinking-medium',
+] as const;
+
+export async function getAiSdkModelOptionsForOpenAI(
+  rawModelName: string,
+): Promise<ModelOptions | null> {
+  const modelName = rawModelName as (typeof OPENAI_MODELS)[number];
+
+  switch (modelName) {
+    case 'gpt-5.1-no-thinking':
+    case 'gpt-5.1-thinking-low':
+    case 'gpt-5.1-thinking-medium':
+    case 'gpt-5.1-thinking-high':
+      let reasoningEffort: string = 'none';
+      if (modelName === 'gpt-5.1-thinking-high') {
+        reasoningEffort = 'high';
+      } else if (modelName === 'gpt-5.1-thinking-medium') {
+        reasoningEffort = 'medium';
+      } else if (modelName === 'gpt-5.1-thinking-low') {
+        reasoningEffort = 'low';
+      }
+      return {
+        model: openai('gpt-5.1'),
+        providerOptions: {
+          openai: {
+            reasoningEffort,
+            reasoningSummary: 'detailed',
+          } satisfies OpenAIResponsesProviderOptions,
+        },
+      };
+    default:
+      return null;
+  }
+}
diff --git a/runner/codegen/runner-creation.ts b/runner/codegen/runner-creation.ts
@@ -4,7 +4,7 @@ import type {ClaudeCodeRunner} from './claude-code-runner.js';
 import type {GenkitRunner} from './genkit/genkit-runner.js';
 import type {CodexRunner} from './codex-runner.js';
 import type {NoopUnimplementedRunner} from './noop-unimplemented-runner.js';
-import {AiSDKRunner} from './ai-sdk-runner.js';
+import {AiSDKRunner} from './ai-sdk/ai-sdk-runner.js';
 
 interface AvailableRunners {
   genkit: GenkitRunner;
@@ -30,7 +30,9 @@ export async function getRunnerByName<T extends RunnerName>(name: T): Promise<Av
         m => new m.GenkitRunner() as AvailableRunners[T],
       );
     case 'ai-sdk':
-      return import('./ai-sdk-runner.js').then(m => new m.AiSDKRunner() as AvailableRunners[T]);
+      return import('./ai-sdk/ai-sdk-runner.js').then(
+        m => new m.AiSDKRunner() as AvailableRunners[T],
+      );
     case 'gemini-cli':
       return import('./gemini-cli-runner.js').then(
         m => new m.GeminiCliRunner() as AvailableRunners[T],