maximhq
diff --git a/‎core/providers/anthropic.go‎
Lines changed: 12 additions & 51 deletions b/‎core/providers/anthropic.go‎
Lines changed: 12 additions & 51 deletions
diff --git a/‎core/providers/azure.go‎
Lines changed: 10 additions & 0 deletions b/‎core/providers/azure.go‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎core/providers/cohere.go‎
Lines changed: 7 additions & 22 deletions b/‎core/providers/cohere.go‎
Lines changed: 7 additions & 22 deletions
diff --git a/‎core/providers/openai.go‎
Lines changed: 10 additions & 0 deletions b/‎core/providers/openai.go‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎core/providers/vertex.go‎
Lines changed: 9 additions & 9 deletions b/‎core/providers/vertex.go‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎core/schemas/pool.go‎
Lines changed: 1 addition & 0 deletions b/‎core/schemas/pool.go‎
Lines changed: 1 addition & 0 deletions
@@ -11,7 +11,6 @@ import (
 	"io"
 	"net/http"
 	"strings"
-	"sync"
 	"time"
 
 	"github.com/bytedance/sonic"
@@ -31,48 +30,6 @@ type AnthropicProvider struct {
 	customProviderConfig *schemas.CustomProviderConfig // Custom provider config
 }
 
-// anthropicChatResponsePool provides a pool for Anthropic chat response objects.
-var anthropicChatResponsePool = sync.Pool{
-	New: func() interface{} {
-		return &anthropic.AnthropicMessageResponse{}
-	},
-}
-
-// anthropicTextResponsePool provides a pool for Anthropic text response objects.
-var anthropicTextResponsePool = sync.Pool{
-	New: func() interface{} {
-		return &anthropic.AnthropicTextResponse{}
-	},
-}
-
-// acquireAnthropicChatResponse gets an Anthropic chat response from the pool and resets it.
-func acquireAnthropicChatResponse() *anthropic.AnthropicMessageResponse {
-	resp := anthropicChatResponsePool.Get().(*anthropic.AnthropicMessageResponse)
-	*resp = anthropic.AnthropicMessageResponse{} // Reset the struct
-	return resp
-}
-
-// releaseAnthropicChatResponse returns an Anthropic chat response to the pool.
-func releaseAnthropicChatResponse(resp *anthropic.AnthropicMessageResponse) {
-	if resp != nil {
-		anthropicChatResponsePool.Put(resp)
-	}
-}
-
-// acquireAnthropicTextResponse gets an Anthropic text response from the pool and resets it.
-func acquireAnthropicTextResponse() *anthropic.AnthropicTextResponse {
-	resp := anthropicTextResponsePool.Get().(*anthropic.AnthropicTextResponse)
-	*resp = anthropic.AnthropicTextResponse{} // Reset the struct
-	return resp
-}
-
-// releaseAnthropicTextResponse returns an Anthropic text response to the pool.
-func releaseAnthropicTextResponse(resp *anthropic.AnthropicTextResponse) {
-	if resp != nil {
-		anthropicTextResponsePool.Put(resp)
-	}
-}
-
 // NewAnthropicProvider creates a new Anthropic provider instance.
 // It initializes the HTTP client with the provided configuration and sets up response pools.
 // The client is configured with timeouts, concurrency limits, and optional proxy settings.
@@ -92,8 +49,8 @@ func NewAnthropicProvider(config *schemas.ProviderConfig, logger schemas.Logger)
 
 	// Pre-warm response pools
 	for i := 0; i < config.ConcurrencyAndBufferSize.Concurrency; i++ {
-		anthropicTextResponsePool.Put(&anthropic.AnthropicTextResponse{})
-		anthropicChatResponsePool.Put(&anthropic.AnthropicMessageResponse{})
+		anthropic.ReleaseTextResponse(&anthropic.AnthropicTextResponse{})
+		anthropic.ReleaseChatResponse(&anthropic.AnthropicMessageResponse{})
 	}
 
 	// Configure proxy if provided
@@ -186,6 +143,7 @@ func (provider *AnthropicProvider) TextCompletion(ctx context.Context, key schem
 	if reqBody == nil {
 		return nil, newBifrostOperationError("text completion input is not provided", nil, provider.GetProviderKey())
 	}
+	defer anthropic.ReleaseTextRequest(reqBody)
 
 	// Use struct directly for JSON marshaling
 	responseBody, latency, err := provider.completeRequest(ctx, reqBody, provider.networkConfig.BaseURL+"/v1/complete", key.Value)
@@ -194,8 +152,8 @@ func (provider *AnthropicProvider) TextCompletion(ctx context.Context, key schem
 	}
 
 	// Create response object from pool
-	response := acquireAnthropicTextResponse()
-	defer releaseAnthropicTextResponse(response)
+	response := anthropic.AcquireTextResponse()
+	defer anthropic.ReleaseTextResponse(response)
 
 	rawResponse, bifrostErr := handleProviderResponse(responseBody, response, provider.sendBackRawResponse)
 	if bifrostErr != nil {
@@ -238,6 +196,7 @@ func (provider *AnthropicProvider) ChatCompletion(ctx context.Context, key schem
 	if reqBody == nil {
 		return nil, newBifrostOperationError("chat completion input is not provided", nil, provider.GetProviderKey())
 	}
+	defer anthropic.ReleaseChatRequest(reqBody)
 
 	// Use struct directly for JSON marshaling
 	responseBody, latency, err := provider.completeRequest(ctx, reqBody, provider.networkConfig.BaseURL+"/v1/messages", key.Value)
@@ -246,8 +205,8 @@ func (provider *AnthropicProvider) ChatCompletion(ctx context.Context, key schem
 	}
 
 	// Create response object from pool
-	response := acquireAnthropicChatResponse()
-	defer releaseAnthropicChatResponse(response)
+	response := anthropic.AcquireChatResponse()
+	defer anthropic.ReleaseChatResponse(response)
 
 	rawResponse, bifrostErr := handleProviderResponse(responseBody, response, provider.sendBackRawResponse)
 	if bifrostErr != nil {
@@ -284,6 +243,7 @@ func (provider *AnthropicProvider) Responses(ctx context.Context, key schemas.Ke
 	if reqBody == nil {
 		return nil, newBifrostOperationError("responses input is not provided", nil, provider.GetProviderKey())
 	}
+	defer anthropic.ReleaseChatRequest(reqBody) // ToAnthropicResponsesRequest returns *AnthropicMessageRequest
 
 	// Use struct directly for JSON marshaling
 	responseBody, latency, err := provider.completeRequest(ctx, reqBody, provider.networkConfig.BaseURL+"/v1/messages", key.Value)
@@ -292,8 +252,8 @@ func (provider *AnthropicProvider) Responses(ctx context.Context, key schemas.Ke
 	}
 
 	// Create response object from pool
-	response := acquireAnthropicChatResponse()
-	defer releaseAnthropicChatResponse(response)
+	response := anthropic.AcquireChatResponse()
+	defer anthropic.ReleaseChatResponse(response)
 
 	rawResponse, bifrostErr := handleProviderResponse(responseBody, response, provider.sendBackRawResponse)
 	if bifrostErr != nil {
@@ -335,6 +295,7 @@ func (provider *AnthropicProvider) ChatCompletionStream(ctx context.Context, pos
 	if reqBody == nil {
 		return nil, newBifrostOperationError("failed to convert request", fmt.Errorf("conversion returned nil"), provider.GetProviderKey())
 	}
+	defer anthropic.ReleaseChatRequest(reqBody)
 	reqBody.Stream = schemas.Ptr(true)
 
 	// Prepare Anthropic headers
 
@@ -43,6 +43,13 @@ func NewAzureProvider(config *schemas.ProviderConfig, logger schemas.Logger) (*A
 		Timeout: time.Second * time.Duration(config.NetworkConfig.DefaultRequestTimeoutInSeconds),
 	}
 
+	// Pre-warm OpenAI pools since Azure uses OpenAI schema objects
+	for i := 0; i < config.ConcurrencyAndBufferSize.Concurrency; i++ {
+		openai.ReleaseTextRequest(&openai.OpenAITextCompletionRequest{})
+		openai.ReleaseChatRequest(&openai.OpenAIChatRequest{})
+		openai.ReleaseEmbeddingRequest(&openai.OpenAIEmbeddingRequest{})
+	}
+
 	// Configure proxy if provided
 	client = configureProxy(client, config.ProxyConfig, logger)
 
@@ -151,6 +158,7 @@ func (provider *AzureProvider) TextCompletion(ctx context.Context, key schemas.K
 	if reqBody == nil {
 		return nil, newBifrostOperationError("text completion input is not provided", nil, schemas.Azure)
 	}
+	defer openai.ReleaseTextRequest(reqBody)
 
 	responseBody, latency, err := provider.completeRequest(ctx, reqBody, "completions", key, request.Model)
 	if err != nil {
@@ -241,6 +249,7 @@ func (provider *AzureProvider) ChatCompletion(ctx context.Context, key schemas.K
 	if reqBody == nil {
 		return nil, newBifrostOperationError("chat completion input is not provided", nil, schemas.Azure)
 	}
+	defer openai.ReleaseChatRequest(reqBody)
 
 	responseBody, latency, err := provider.completeRequest(ctx, reqBody, "chat/completions", key, request.Model)
 	if err != nil {
@@ -297,6 +306,7 @@ func (provider *AzureProvider) Embedding(ctx context.Context, key schemas.Key, r
 	if reqBody == nil {
 		return nil, newBifrostOperationError("embedding input is not provided", nil, schemas.Azure)
 	}
+	defer openai.ReleaseEmbeddingRequest(reqBody)
 
 	responseBody, latency, err := provider.completeRequest(ctx, reqBody, "embeddings", key, request.Model)
 	if err != nil {
 
@@ -10,7 +10,6 @@ import (
 	"fmt"
 	"io"
 	"strings"
-	"sync"
 	"time"
 
 	"net/http"
@@ -21,26 +20,7 @@ import (
 	"github.com/valyala/fasthttp"
 )
 
-// cohereResponsePool provides a pool for Cohere v2 response objects.
-var cohereResponsePool = sync.Pool{
-	New: func() interface{} {
-		return &cohere.CohereChatResponse{}
-	},
-}
-
-// acquireCohereResponse gets a Cohere v2 response from the pool and resets it.
-func acquireCohereResponse() *cohere.CohereChatResponse {
-	resp := cohereResponsePool.Get().(*cohere.CohereChatResponse)
-	*resp = cohere.CohereChatResponse{} // Reset the struct
-	return resp
-}
-
-// releaseCohereResponse returns a Cohere v2 response to the pool.
-func releaseCohereResponse(resp *cohere.CohereChatResponse) {
-	if resp != nil {
-		cohereResponsePool.Put(resp)
-	}
-}
+// Removed deprecated cohereResponsePool - now using schema-level pools
 
 // CohereProvider implements the Provider interface for Cohere.
 type CohereProvider struct {
@@ -71,7 +51,8 @@ func NewCohereProvider(config *schemas.ProviderConfig, logger schemas.Logger) *C
 
 	// Pre-warm response pools
 	for i := 0; i < config.ConcurrencyAndBufferSize.Concurrency; i++ {
-		cohereResponsePool.Put(&cohere.CohereChatResponse{})
+		cohere.ReleaseChatRequest(&cohere.CohereChatRequest{})
+		cohere.ReleaseEmbeddingRequest(&cohere.CohereEmbeddingRequest{})
 	}
 
 	// Set default BaseURL if not provided
@@ -124,6 +105,7 @@ func (provider *CohereProvider) ChatCompletion(ctx context.Context, key schemas.
 	if reqBody == nil {
 		return nil, newBifrostOperationError("chat completion input is not provided", nil, providerName)
 	}
+	defer cohere.ReleaseChatRequest(reqBody)
 
 	cohereResponse, rawResponse, latency, err := provider.handleCohereChatCompletionRequest(ctx, reqBody, key)
 	if err != nil {
@@ -236,6 +218,7 @@ func (provider *CohereProvider) Responses(ctx context.Context, key schemas.Key,
 	if reqBody == nil {
 		return nil, newBifrostOperationError("responses input is not provided", nil, providerName)
 	}
+	defer cohere.ReleaseChatRequest(reqBody) // ToCohereResponsesRequest returns *CohereChatRequest
 
 	cohereResponse, rawResponse, latency, err := provider.handleCohereChatCompletionRequest(ctx, reqBody, key)
 	if err != nil {
@@ -273,6 +256,7 @@ func (provider *CohereProvider) Embedding(ctx context.Context, key schemas.Key,
 	if reqBody == nil {
 		return nil, newBifrostOperationError("embedding input is not provided", nil, providerName)
 	}
+	defer cohere.ReleaseEmbeddingRequest(reqBody)
 
 	// Marshal request body
 	jsonBody, err := sonic.Marshal(reqBody)
@@ -357,6 +341,7 @@ func (provider *CohereProvider) ChatCompletionStream(ctx context.Context, postHo
 	if reqBody == nil {
 		return nil, newBifrostOperationError("chat completion input is not provided", nil, providerName)
 	}
+	defer cohere.ReleaseChatRequest(reqBody)
 	reqBody.Stream = schemas.Ptr(true)
 
 	jsonBody, err := sonic.Marshal(reqBody)
 
@@ -111,6 +111,7 @@ func handleOpenAITextCompletionRequest(
 	if reqBody == nil {
 		return nil, newBifrostOperationError("text completion input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseTextRequest(reqBody)
 	// Create request
 	req := fasthttp.AcquireRequest()
 	resp := fasthttp.AcquireResponse()
@@ -207,6 +208,7 @@ func handleOpenAITextCompletionStreaming(
 	if reqBody == nil {
 		return nil, newBifrostOperationError("text completion input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseTextRequest(reqBody)
 	reqBody.Stream = schemas.Ptr(true)
 	reqBody.StreamOptions = &schemas.ChatStreamOptions{
 		IncludeUsage: schemas.Ptr(true),
@@ -437,6 +439,7 @@ func handleOpenAIChatCompletionRequest(
 	if reqBody == nil {
 		return nil, newBifrostOperationError("chat completion input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseChatRequest(reqBody)
 
 	jsonBody, err := sonic.Marshal(reqBody)
 	if err != nil {
@@ -532,6 +535,7 @@ func handleOpenAIResponsesRequest(
 	if reqBody == nil {
 		return nil, newBifrostOperationError("responses input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseResponsesRequest(reqBody)
 
 	jsonBody, err := sonic.Marshal(reqBody)
 	if err != nil {
@@ -633,6 +637,7 @@ func handleOpenAIEmbeddingRequest(
 	if reqBody == nil {
 		return nil, newBifrostOperationError("embedding input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseEmbeddingRequest(reqBody)
 
 	jsonBody, err := sonic.Marshal(reqBody)
 	if err != nil {
@@ -730,6 +735,7 @@ func handleOpenAIStreaming(
 	if reqBody == nil {
 		return nil, newBifrostOperationError("chat completion input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseChatRequest(reqBody)
 	reqBody.Stream = schemas.Ptr(true)
 	reqBody.StreamOptions = &schemas.ChatStreamOptions{
 		IncludeUsage: schemas.Ptr(true),
@@ -950,6 +956,7 @@ func (provider *OpenAIProvider) Speech(ctx context.Context, key schemas.Key, req
 	if reqBody == nil {
 		return nil, newBifrostOperationError("speech input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseSpeechRequest(reqBody)
 
 	jsonBody, err := sonic.Marshal(reqBody)
 	if err != nil {
@@ -1022,6 +1029,7 @@ func (provider *OpenAIProvider) SpeechStream(ctx context.Context, postHookRunner
 	if reqBody == nil {
 		return nil, newBifrostOperationError("speech input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseSpeechRequest(reqBody)
 	reqBody.StreamFormat = schemas.Ptr("sse")
 
 	jsonBody, err := sonic.Marshal(reqBody)
@@ -1202,6 +1210,7 @@ func (provider *OpenAIProvider) Transcription(ctx context.Context, key schemas.K
 	if reqBody == nil {
 		return nil, newBifrostOperationError("transcription input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseTranscriptionRequest(reqBody)
 
 	// Create multipart form
 	var body bytes.Buffer
@@ -1289,6 +1298,7 @@ func (provider *OpenAIProvider) TranscriptionStream(ctx context.Context, postHoo
 	if reqBody == nil {
 		return nil, newBifrostOperationError("transcription input is not provided", nil, providerName)
 	}
+	defer openai.ReleaseTranscriptionRequest(reqBody)
 	reqBody.Stream = schemas.Ptr(true)
 
 	// Create multipart form
 
@@ -70,9 +70,8 @@ func NewVertexProvider(config *schemas.ProviderConfig, logger schemas.Logger) (*
 
 	// Pre-warm response pools
 	for range config.ConcurrencyAndBufferSize.Concurrency {
-		// openAIResponsePool.Put(&schemas.BifrostResponse{})
-		anthropicChatResponsePool.Put(&anthropic.AnthropicMessageResponse{})
-
+		vertex.ReleaseEmbeddingRequest(&vertex.VertexEmbeddingRequest{})
+		vertex.ReleaseEmbeddingResponse(&vertex.VertexEmbeddingResponse{})
 	}
 
 	return &VertexProvider{
@@ -222,7 +221,7 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.
 				},
 			}
 		}
-		if errors.Is(err, fasthttp.ErrTimeout) ||  errors.Is(err, context.DeadlineExceeded) {
+		if errors.Is(err, fasthttp.ErrTimeout) || errors.Is(err, context.DeadlineExceeded) {
 			return nil, newBifrostOperationError(schemas.ErrProviderRequestTimedOut, err, schemas.Vertex)
 		}
 		return nil, &schemas.BifrostError{
@@ -259,7 +258,7 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.
 				},
 			}
 		}
-		if errors.Is(err, fasthttp.ErrTimeout) ||  errors.Is(err, context.DeadlineExceeded) {
+		if errors.Is(err, fasthttp.ErrTimeout) || errors.Is(err, context.DeadlineExceeded) {
 			return nil, newBifrostOperationError(schemas.ErrProviderRequestTimedOut, err, schemas.Vertex)
 		}
 		// Remove client from pool for non-context errors (could be auth/network issues)
@@ -300,8 +299,8 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.
 
 	if strings.Contains(request.Model, "claude") {
 		// Create response object from pool
-		response := acquireAnthropicChatResponse()
-		defer releaseAnthropicChatResponse(response)
+		response := anthropic.AcquireChatResponse()
+		defer anthropic.ReleaseChatResponse(response)
 
 		rawResponse, bifrostErr := handleProviderResponse(body, response, provider.sendBackRawResponse)
 		if bifrostErr != nil {
@@ -383,6 +382,7 @@ func (provider *VertexProvider) Embedding(ctx context.Context, key schemas.Key,
 	if reqBody == nil {
 		return nil, newConfigurationError("embedding input texts are empty", schemas.Vertex)
 	}
+	defer vertex.ReleaseEmbeddingRequest(reqBody)
 
 	// All Vertex AI embedding models use the same native Vertex embedding API
 	return provider.handleVertexEmbedding(ctx, request.Model, key, reqBody, request.Params)
@@ -414,7 +414,7 @@ func (provider *VertexProvider) handleVertexEmbedding(ctx context.Context, model
 				},
 			}
 		}
-		if errors.Is(err, fasthttp.ErrTimeout) ||  errors.Is(err, context.DeadlineExceeded) {
+		if errors.Is(err, fasthttp.ErrTimeout) || errors.Is(err, context.DeadlineExceeded) {
 			return nil, newBifrostOperationError(schemas.ErrProviderRequestTimedOut, err, schemas.Vertex)
 		}
 		return nil, newBifrostOperationError(schemas.ErrProviderRequest, err, schemas.Vertex)
@@ -445,7 +445,7 @@ func (provider *VertexProvider) handleVertexEmbedding(ctx context.Context, model
 				},
 			}
 		}
-		if errors.Is(err, fasthttp.ErrTimeout) ||  errors.Is(err, context.DeadlineExceeded) {
+		if errors.Is(err, fasthttp.ErrTimeout) || errors.Is(err, context.DeadlineExceeded) {
 			return nil, newBifrostOperationError(schemas.ErrProviderRequestTimedOut, err, schemas.Vertex)
 		}
 		// Remove client from pool for non-context errors (could be auth/network issues)
 
@@ -0,0 +1 @@
+package schemas
Original file line number	Diff line number	Diff line change
`@@ -70,9 +70,8 @@ func NewVertexProvider(config schemas.ProviderConfig, logger schemas.Logger) (`
`70`	`70`
`71`	`71`	`// Pre-warm response pools`
`72`	`72`	`for range config.ConcurrencyAndBufferSize.Concurrency {`
`73`		`- // openAIResponsePool.Put(&schemas.BifrostResponse{})`
`74`		`- anthropicChatResponsePool.Put(&anthropic.AnthropicMessageResponse{})`
`75`		`-`
	`73`	`+ vertex.ReleaseEmbeddingRequest(&vertex.VertexEmbeddingRequest{})`
	`74`	`+ vertex.ReleaseEmbeddingResponse(&vertex.VertexEmbeddingResponse{})`
`76`	`75`	`}`
`77`	`76`
`78`	`77`	`return &VertexProvider{`
`@@ -222,7 +221,7 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.`
`222`	`221`	`},`
`223`	`222`	`}`
`224`	`223`	`}`
`225`		`- if errors.Is(err, fasthttp.ErrTimeout) \|\| errors.Is(err, context.DeadlineExceeded) {`
	`224`	`+ if errors.Is(err, fasthttp.ErrTimeout) \|\| errors.Is(err, context.DeadlineExceeded) {`
`226`	`225`	`return nil, newBifrostOperationError(schemas.ErrProviderRequestTimedOut, err, schemas.Vertex)`
`227`	`226`	`}`
`228`	`227`	`return nil, &schemas.BifrostError{`
`@@ -259,7 +258,7 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.`
`259`	`258`	`},`
`260`	`259`	`}`
`261`	`260`	`}`
`262`		`- if errors.Is(err, fasthttp.ErrTimeout) \|\| errors.Is(err, context.DeadlineExceeded) {`
	`261`	`+ if errors.Is(err, fasthttp.ErrTimeout) \|\| errors.Is(err, context.DeadlineExceeded) {`
`263`	`262`	`return nil, newBifrostOperationError(schemas.ErrProviderRequestTimedOut, err, schemas.Vertex)`
`264`	`263`	`}`
`265`	`264`	`// Remove client from pool for non-context errors (could be auth/network issues)`
`@@ -300,8 +299,8 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.`
`300`	`299`
`301`	`300`	`if strings.Contains(request.Model, "claude") {`
`302`	`301`	`// Create response object from pool`
`303`		`- response := acquireAnthropicChatResponse()`
`304`		`- defer releaseAnthropicChatResponse(response)`
	`302`	`+ response := anthropic.AcquireChatResponse()`
	`303`	`+ defer anthropic.ReleaseChatResponse(response)`
`305`	`304`
`306`	`305`	`rawResponse, bifrostErr := handleProviderResponse(body, response, provider.sendBackRawResponse)`
`307`	`306`	`if bifrostErr != nil {`
`@@ -383,6 +382,7 @@ func (provider *VertexProvider) Embedding(ctx context.Context, key schemas.Key,`
`383`	`382`	`if reqBody == nil {`
`384`	`383`	`return nil, newConfigurationError("embedding input texts are empty", schemas.Vertex)`
`385`	`384`	`}`
	`385`	`+ defer vertex.ReleaseEmbeddingRequest(reqBody)`
`386`	`386`
`387`	`387`	`// All Vertex AI embedding models use the same native Vertex embedding API`
`388`	`388`	`return provider.handleVertexEmbedding(ctx, request.Model, key, reqBody, request.Params)`
`@@ -414,7 +414,7 @@ func (provider *VertexProvider) handleVertexEmbedding(ctx context.Context, model`
`414`	`414`	`},`
`415`	`415`	`}`
`416`	`416`	`}`
`417`		`- if errors.Is(err, fasthttp.ErrTimeout) \|\| errors.Is(err, context.DeadlineExceeded) {`
	`417`	`+ if errors.Is(err, fasthttp.ErrTimeout) \|\| errors.Is(err, context.DeadlineExceeded) {`
`418`	`418`	`return nil, newBifrostOperationError(schemas.ErrProviderRequestTimedOut, err, schemas.Vertex)`
`419`	`419`	`}`
`420`	`420`	`return nil, newBifrostOperationError(schemas.ErrProviderRequest, err, schemas.Vertex)`
`@@ -445,7 +445,7 @@ func (provider *VertexProvider) handleVertexEmbedding(ctx context.Context, model`
`445`	`445`	`},`
`446`	`446`	`}`
`447`	`447`	`}`
`448`		`- if errors.Is(err, fasthttp.ErrTimeout) \|\| errors.Is(err, context.DeadlineExceeded) {`
	`448`	`+ if errors.Is(err, fasthttp.ErrTimeout) \|\| errors.Is(err, context.DeadlineExceeded) {`
`449`	`449`	`return nil, newBifrostOperationError(schemas.ErrProviderRequestTimedOut, err, schemas.Vertex)`
`450`	`450`	`}`
`451`	`451`	`// Remove client from pool for non-context errors (could be auth/network issues)`