diff --git a/core/bifrost.go b/core/bifrost.go index a89765c82..253943a65 100644 --- a/core/bifrost.go +++ b/core/bifrost.go @@ -825,6 +825,7 @@ func (bifrost *Bifrost) ReloadPlugin(plugin schemas.Plugin) error { } } +// GetConfiguredProviders returns a configured providers list. func (bifrost *Bifrost) GetConfiguredProviders() ([]schemas.ModelProvider, error) { providers := bifrost.providers.Load() if providers == nil { diff --git a/core/schemas/models.go b/core/schemas/models.go index 5b50077aa..3f251e0fb 100644 --- a/core/schemas/models.go +++ b/core/schemas/models.go @@ -44,6 +44,68 @@ type BifrostListModelsResponse struct { HasMore *bool `json:"-"` } +// ApplyPagination applies offset-based pagination to a BifrostListModelsResponse. +// Uses opaque tokens with LastID validation to ensure cursor integrity. +// Returns the paginated response with properly set NextPageToken. +func (response *BifrostListModelsResponse) ApplyPagination(pageSize int, pageToken string) *BifrostListModelsResponse { + if response == nil { + return nil + } + + totalItems := len(response.Data) + + if pageSize <= 0 { + return response + } + + cursor := decodePaginationCursor(pageToken) + offset := cursor.Offset + + // Validate cursor integrity if LastID is present + if cursor.LastID != "" && !validatePaginationCursor(cursor, response.Data) { + // Invalid cursor: reset to beginning + offset = 0 + } + + if offset >= totalItems { + // Return empty page, no next token + return &BifrostListModelsResponse{ + Data: []Model{}, + ExtraFields: response.ExtraFields, + NextPageToken: "", + } + } + + endIndex := offset + pageSize + if endIndex > totalItems { + endIndex = totalItems + } + + paginatedData := response.Data[offset:endIndex] + + paginatedResponse := &BifrostListModelsResponse{ + Data: paginatedData, + ExtraFields: response.ExtraFields, + } + + if endIndex < totalItems { + // Get the last item ID for cursor validation + var lastID string + if len(paginatedData) > 0 { + lastID = paginatedData[len(paginatedData)-1].ID + } + + nextToken, err := encodePaginationCursor(endIndex, lastID) + if err == nil { + paginatedResponse.NextPageToken = nextToken + } + } else { + paginatedResponse.NextPageToken = "" + } + + return paginatedResponse +} + type Model struct { ID string `json:"id"` CanonicalSlug *string `json:"canonical_slug,omitempty"` @@ -172,65 +234,3 @@ func validatePaginationCursor(cursor paginationCursor, data []Model) bool { return true } - -// ApplyPagination applies offset-based pagination to a BifrostListModelsResponse. -// Uses opaque tokens with LastID validation to ensure cursor integrity. -// Returns the paginated response with properly set NextPageToken. -func (response *BifrostListModelsResponse) ApplyPagination(pageSize int, pageToken string) *BifrostListModelsResponse { - if response == nil { - return nil - } - - totalItems := len(response.Data) - - if pageSize <= 0 { - return response - } - - cursor := decodePaginationCursor(pageToken) - offset := cursor.Offset - - // Validate cursor integrity if LastID is present - if cursor.LastID != "" && !validatePaginationCursor(cursor, response.Data) { - // Invalid cursor: reset to beginning - offset = 0 - } - - if offset >= totalItems { - // Return empty page, no next token - return &BifrostListModelsResponse{ - Data: []Model{}, - ExtraFields: response.ExtraFields, - NextPageToken: "", - } - } - - endIndex := offset + pageSize - if endIndex > totalItems { - endIndex = totalItems - } - - paginatedData := response.Data[offset:endIndex] - - paginatedResponse := &BifrostListModelsResponse{ - Data: paginatedData, - ExtraFields: response.ExtraFields, - } - - if endIndex < totalItems { - // Get the last item ID for cursor validation - var lastID string - if len(paginatedData) > 0 { - lastID = paginatedData[len(paginatedData)-1].ID - } - - nextToken, err := encodePaginationCursor(endIndex, lastID) - if err == nil { - paginatedResponse.NextPageToken = nextToken - } - } else { - paginatedResponse.NextPageToken = "" - } - - return paginatedResponse -} diff --git a/docs/quickstart/gateway/cli-agents.mdx b/docs/quickstart/gateway/cli-agents.mdx index a39fa132f..11f0076e1 100644 --- a/docs/quickstart/gateway/cli-agents.mdx +++ b/docs/quickstart/gateway/cli-agents.mdx @@ -246,6 +246,16 @@ For complete monitoring capabilities, see [Built-in Observability](../../feature Bifrost automatically sends all configured MCP tools to your agents. This means your agents can access filesystem operations, database queries, web search, and more without any additional configuration. + +**Important: MCP Tool Execution Behavior** + +When using Bifrost as a gateway, MCP tool calls require manual approval and execution for security reasons. Bifrost returns the tool call information but doesn't automatically execute it. You need to handle the approval and execution logic by calling the `v1/mcp/tool/execute` endpoint. + +**Gateway-on-Gateway Limitations**: If your agent/editor (like Zed) has its own gateway that routes through Bifrost, the agent's gateway may not handle MCP tool approvals that come from Bifrost. In such cases, we recommend configuring MCP tools directly in your agent/editor instead of relying on Bifrost's MCP integration. + +We intentionally avoid supporting "gateway-on-gateway" MCP setups because handling tool approvals across multiple gateways introduces unnecessary complexity and falls outside the scope of what an LLM gateway should manage. While we're working on an agentic mode that will allow Bifrost to automatically execute certain tool calls, the current design prioritizes security and clear responsibility boundaries. + + For setup and available tools, see [MCP Integration](../../features/mcp). ## Next Steps diff --git a/framework/changelog.md b/framework/changelog.md index 5634baade..48b7f16d2 100644 --- a/framework/changelog.md +++ b/framework/changelog.md @@ -1 +1,2 @@ - chore: update core version to 1.2.22 +- feat: expose method to get pricing data for a model in model catalog \ No newline at end of file diff --git a/framework/modelcatalog/main.go b/framework/modelcatalog/main.go index 80e28e913..010e4182e 100644 --- a/framework/modelcatalog/main.go +++ b/framework/modelcatalog/main.go @@ -51,9 +51,6 @@ type ModelCatalog struct { syncCancel context.CancelFunc } -// PricingData represents the structure of the pricing.json file -type PricingData map[string]PricingEntry - // PricingEntry represents a single model's pricing information type PricingEntry struct { // Basic pricing @@ -61,16 +58,13 @@ type PricingEntry struct { OutputCostPerToken float64 `json:"output_cost_per_token"` Provider string `json:"provider"` Mode string `json:"mode"` - // Additional pricing for media InputCostPerImage *float64 `json:"input_cost_per_image,omitempty"` InputCostPerVideoPerSecond *float64 `json:"input_cost_per_video_per_second,omitempty"` InputCostPerAudioPerSecond *float64 `json:"input_cost_per_audio_per_second,omitempty"` - // Character-based pricing InputCostPerCharacter *float64 `json:"input_cost_per_character,omitempty"` OutputCostPerCharacter *float64 `json:"output_cost_per_character,omitempty"` - // Pricing above 128k tokens InputCostPerTokenAbove128kTokens *float64 `json:"input_cost_per_token_above_128k_tokens,omitempty"` InputCostPerCharacterAbove128kTokens *float64 `json:"input_cost_per_character_above_128k_tokens,omitempty"` @@ -79,7 +73,6 @@ type PricingEntry struct { InputCostPerAudioPerSecondAbove128kTokens *float64 `json:"input_cost_per_audio_per_second_above_128k_tokens,omitempty"` OutputCostPerTokenAbove128kTokens *float64 `json:"output_cost_per_token_above_128k_tokens,omitempty"` OutputCostPerCharacterAbove128kTokens *float64 `json:"output_cost_per_character_above_128k_tokens,omitempty"` - // Cache and batch pricing CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost,omitempty"` InputCostPerTokenBatches *float64 `json:"input_cost_per_token_batches,omitempty"` @@ -189,6 +182,28 @@ func (mc *ModelCatalog) getPricingSyncInterval() time.Duration { return mc.pricingSyncInterval } +// GetPricingData returns the pricing data +func (mc *ModelCatalog) GetPricingEntryForModel(model string, provider schemas.ModelProvider) *PricingEntry { + mc.mu.RLock() + defer mc.mu.RUnlock() + // Check all modes + for _, mode := range []schemas.RequestType{ + schemas.TextCompletionRequest, + schemas.ChatCompletionRequest, + schemas.ResponsesRequest, + schemas.EmbeddingRequest, + schemas.SpeechRequest, + schemas.TranscriptionRequest, + } { + key := makeKey(model, string(provider), normalizeRequestType(mode)) + pricing, ok := mc.pricingData[key] + if ok { + return convertTableModelPricingToPricingData(&pricing) + } + } + return nil +} + // GetModelsForProvider returns all available models for a given provider (thread-safe) func (mc *ModelCatalog) GetModelsForProvider(provider schemas.ModelProvider) []string { mc.mu.RLock() diff --git a/framework/modelcatalog/sync.go b/framework/modelcatalog/sync.go index 6a909a164..59a717f8f 100644 --- a/framework/modelcatalog/sync.go +++ b/framework/modelcatalog/sync.go @@ -132,7 +132,7 @@ func (mc *ModelCatalog) syncPricing(ctx context.Context) error { } // loadPricingFromURL loads pricing data from the remote URL -func (mc *ModelCatalog) loadPricingFromURL(ctx context.Context) (PricingData, error) { +func (mc *ModelCatalog) loadPricingFromURL(ctx context.Context) (map[string]PricingEntry, error) { // Create HTTP client with timeout client := &http.Client{ Timeout: 30 * time.Second, @@ -160,7 +160,7 @@ func (mc *ModelCatalog) loadPricingFromURL(ctx context.Context) (PricingData, er } // Unmarshal JSON data - var pricingData PricingData + var pricingData map[string]PricingEntry if err := json.Unmarshal(data, &pricingData); err != nil { return nil, fmt.Errorf("failed to unmarshal pricing data: %w", err) } diff --git a/framework/modelcatalog/utils.go b/framework/modelcatalog/utils.go index 2ff8e3a42..399913662 100644 --- a/framework/modelcatalog/utils.go +++ b/framework/modelcatalog/utils.go @@ -119,6 +119,31 @@ func convertPricingDataToTableModelPricing(modelKey string, entry PricingEntry) return pricing } +// convertTableModelPricingToPricingData converts the TableModelPricing struct to a DataSheetPricingEntry struct +func convertTableModelPricingToPricingData(pricing *configstoreTables.TableModelPricing) *PricingEntry { + return &PricingEntry{ + Provider: pricing.Provider, + Mode: pricing.Mode, + InputCostPerToken: pricing.InputCostPerToken, + OutputCostPerToken: pricing.OutputCostPerToken, + InputCostPerImage: pricing.InputCostPerImage, + InputCostPerVideoPerSecond: pricing.InputCostPerVideoPerSecond, + InputCostPerAudioPerSecond: pricing.InputCostPerAudioPerSecond, + InputCostPerCharacter: pricing.InputCostPerCharacter, + OutputCostPerCharacter: pricing.OutputCostPerCharacter, + InputCostPerTokenAbove128kTokens: pricing.InputCostPerTokenAbove128kTokens, + InputCostPerCharacterAbove128kTokens: pricing.InputCostPerCharacterAbove128kTokens, + InputCostPerImageAbove128kTokens: pricing.InputCostPerImageAbove128kTokens, + InputCostPerVideoPerSecondAbove128kTokens: pricing.InputCostPerVideoPerSecondAbove128kTokens, + InputCostPerAudioPerSecondAbove128kTokens: pricing.InputCostPerAudioPerSecondAbove128kTokens, + OutputCostPerTokenAbove128kTokens: pricing.OutputCostPerTokenAbove128kTokens, + OutputCostPerCharacterAbove128kTokens: pricing.OutputCostPerCharacterAbove128kTokens, + CacheReadInputTokenCost: pricing.CacheReadInputTokenCost, + InputCostPerTokenBatches: pricing.InputCostPerTokenBatches, + OutputCostPerTokenBatches: pricing.OutputCostPerTokenBatches, + } +} + // getSafeFloat64 returns the value of a float64 pointer or fallback if nil func getSafeFloat64(ptr *float64, fallback float64) float64 { if ptr != nil { diff --git a/transports/bifrost-http/handlers/inference.go b/transports/bifrost-http/handlers/inference.go index 2d988ca32..c0f4b86f5 100644 --- a/transports/bifrost-http/handlers/inference.go +++ b/transports/bifrost-http/handlers/inference.go @@ -339,6 +339,27 @@ func (h *CompletionHandler) listModels(ctx *fasthttp.RequestCtx) { return } + // Add pricing data to the response + if len(resp.Data) > 0 && h.config.PricingManager != nil { + for i, modelEntry := range resp.Data { + provider, modelName := schemas.ParseModelString(modelEntry.ID, "") + pricingEntry := h.config.PricingManager.GetPricingEntryForModel(modelName, provider) + if pricingEntry != nil { + pricing := &schemas.Pricing{ + Prompt: bifrost.Ptr(fmt.Sprintf("%f", pricingEntry.InputCostPerToken)), + Completion: bifrost.Ptr(fmt.Sprintf("%f", pricingEntry.OutputCostPerToken)), + } + if pricingEntry.InputCostPerImage != nil { + pricing.Image = bifrost.Ptr(fmt.Sprintf("%f", *pricingEntry.InputCostPerImage)) + } + if pricingEntry.CacheReadInputTokenCost != nil { + pricing.InputCacheRead = bifrost.Ptr(fmt.Sprintf("%f", *pricingEntry.CacheReadInputTokenCost)) + } + resp.Data[i].Pricing = pricing + } + } + } + // Send successful response SendJSON(ctx, resp) } @@ -656,7 +677,7 @@ func (h *CompletionHandler) speech(ctx *fasthttp.RequestCtx) { return } - if req.Input == "" { + if req.SpeechInput == nil || req.SpeechInput.Input == "" { SendError(ctx, fasthttp.StatusBadRequest, "Input is required for speech completion") return } diff --git a/transports/bifrost-http/server/server.go b/transports/bifrost-http/server/server.go index 693fa74d6..44446e14e 100644 --- a/transports/bifrost-http/server/server.go +++ b/transports/bifrost-http/server/server.go @@ -816,6 +816,7 @@ func (s *BifrostHTTPServer) Bootstrap(ctx context.Context) error { } else { s.Config.PricingManager.AddModelDataToPool(modelData) } + // Add pricing data to the client logger.Info("models added to catalog") s.Config.SetBifrostClient(s.Client) // Initialize routes @@ -930,4 +931,4 @@ func (s *BifrostHTTPServer) Start() error { return err } return nil -} \ No newline at end of file +} diff --git a/transports/changelog.md b/transports/changelog.md index b0dfa3236..972c9bd63 100644 --- a/transports/changelog.md +++ b/transports/changelog.md @@ -1,3 +1,4 @@ - chore: update core version to 1.2.22 and framework version to 1.1.27 - feat: added unified streaming lifecycle events across all providers to fully align with OpenAI’s streaming response types. -- chore: shift from `alpha/responses` to `v1/responses` in openrouter provider for responses API \ No newline at end of file +- chore: shift from `alpha/responses` to `v1/responses` in openrouter provider for responses API +- feat: send back pricing data for models in list models response \ No newline at end of file