response mapping init

JaredforReal · JaredforReal · commit 877729f4267c · 2025-10-27T17:09:18.000+08:00
Signed-off-by: JaredforReal &lt;w13431838023@gmail.com&gt;
diff --git a/config/config.development.yaml b/config/config.development.yaml
@@ -60,6 +60,9 @@ categories:
 
 default_model: test-model
 
+# Enable OpenAI Responses API adapter (experimental)
+enable_responses_adapter: true
+
 # Auto model name for automatic model selection (optional)
 # Uncomment and set to customize the model name for automatic routing
 # auto_model_name: "MoM"
@@ -75,31 +78,31 @@ observability:
   tracing:
     # Enable tracing for development/debugging
     enabled: true
-    
+
     # OpenTelemetry provider
     provider: "opentelemetry"
-    
+
     exporter:
       # Stdout exporter prints traces to console (great for debugging)
       type: "stdout"
-      
+
       # No endpoint needed for stdout
       # endpoint: ""
       # insecure: true
-    
+
     sampling:
       # Always sample in development to see all traces
       type: "always_on"
-      
+
       # Rate not used for always_on
       # rate: 1.0
-    
+
     resource:
       # Service name for trace identification
       service_name: "vllm-semantic-router-dev"
-      
+
       # Version for development
       service_version: "dev"
-      
+
       # Environment identifier
       deployment_environment: "development"
diff --git a/config/config.yaml b/config/config.yaml
@@ -245,6 +245,9 @@ reasoning_families:
 # Global default reasoning effort level
 default_reasoning_effort: high
 
+# Enable OpenAI Responses API adapter (experimental)
+enable_responses_adapter: false
+
 # API Configuration
 api:
   batch_classification:
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
@@ -131,6 +131,10 @@ type RouterConfig struct {
 
 	// Gateway route cache clearing
 	ClearRouteCache bool `yaml:"clear_route_cache"`
+
+	// EnableResponsesAdapter enables the compatibility shim for OpenAI Responses API (/v1/responses)
+	// When enabled, POST /v1/responses requests are adapted to legacy /v1/chat/completions.
+	EnableResponsesAdapter bool `yaml:"enable_responses_adapter"`
 }
 
 // APIConfig represents configuration for API endpoints
diff --git a/src/semantic-router/pkg/extproc/mapping_responses.go b/src/semantic-router/pkg/extproc/mapping_responses.go
@@ -0,0 +1,146 @@
+package extproc
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+// mapResponsesRequestToChatCompletions converts a minimal OpenAI Responses API request
+// into a legacy Chat Completions request JSON. Supports only text input for PR1.
+func mapResponsesRequestToChatCompletions(original []byte) ([]byte, error) {
+	var req map[string]interface{}
+	if err := json.Unmarshal(original, &req); err != nil {
+		return nil, err
+	}
+
+	// Extract model
+	model, _ := req["model"].(string)
+	if model == "" {
+		return nil, fmt.Errorf("missing model")
+	}
+
+	// Derive user content
+	var userContent string
+	if input, ok := req["input"]; ok {
+		switch v := input.(type) {
+		case string:
+			userContent = v
+		case []interface{}:
+			// Join any string elements; ignore non-string for now
+			var parts []string
+			for _, it := range v {
+				if s, ok := it.(string); ok {
+					parts = append(parts, s)
+				} else if m, ok := it.(map[string]interface{}); ok {
+					// Try common shapes: {type:"input_text"|"text", text:"..."}
+					if t, _ := m["type"].(string); t == "input_text" || t == "text" {
+						if txt, _ := m["text"].(string); txt != "" {
+							parts = append(parts, txt)
+						}
+					}
+				}
+			}
+			userContent = strings.TrimSpace(strings.Join(parts, " "))
+		default:
+			// unsupported multimodal
+			return nil, fmt.Errorf("unsupported input type")
+		}
+	} else if msgs, ok := req["messages"].([]interface{}); ok {
+		// Fallback: if caller already provided messages, pass them through
+		// This enables easy migration from chat/completions
+		mapped := map[string]interface{}{
+			"model":    model,
+			"messages": msgs,
+		}
+		// Map basic params
+		if v, ok := req["temperature"]; ok {
+			mapped["temperature"] = v
+		}
+		if v, ok := req["top_p"]; ok {
+			mapped["top_p"] = v
+		}
+		if v, ok := req["max_output_tokens"]; ok {
+			mapped["max_tokens"] = v
+		}
+		return json.Marshal(mapped)
+	}
+
+	if userContent == "" {
+		return nil, fmt.Errorf("empty input")
+	}
+
+	// Build minimal Chat Completions request
+	mapped := map[string]interface{}{
+		"model": model,
+		"messages": []map[string]interface{}{
+			{"role": "user", "content": userContent},
+		},
+	}
+	// Map basic params
+	if v, ok := req["temperature"]; ok {
+		mapped["temperature"] = v
+	}
+	if v, ok := req["top_p"]; ok {
+		mapped["top_p"] = v
+	}
+	if v, ok := req["max_output_tokens"]; ok {
+		mapped["max_tokens"] = v
+	}
+
+	return json.Marshal(mapped)
+}
+
+// mapChatCompletionToResponses converts an OpenAI ChatCompletion JSON
+// into a minimal Responses API JSON (non-streaming only) for PR1.
+func mapChatCompletionToResponses(chatCompletionJSON []byte) ([]byte, error) {
+	var parsed struct {
+		ID      string `json:"id"`
+		Object  string `json:"object"`
+		Created int64  `json:"created"`
+		Model   string `json:"model"`
+		Choices []struct {
+			Index        int    `json:"index"`
+			FinishReason string `json:"finish_reason"`
+			Message      struct {
+				Role    string `json:"role"`
+				Content string `json:"content"`
+			} `json:"message"`
+		} `json:"choices"`
+		Usage struct {
+			PromptTokens     int `json:"prompt_tokens"`
+			CompletionTokens int `json:"completion_tokens"`
+			TotalTokens      int `json:"total_tokens"`
+		} `json:"usage"`
+	}
+	if err := json.Unmarshal(chatCompletionJSON, &parsed); err != nil {
+		return nil, err
+	}
+
+	content := ""
+	stopReason := "stop"
+	if len(parsed.Choices) > 0 {
+		content = parsed.Choices[0].Message.Content
+		if parsed.Choices[0].FinishReason != "" {
+			stopReason = parsed.Choices[0].FinishReason
+		}
+	}
+
+	out := map[string]interface{}{
+		"id":      parsed.ID,
+		"object":  "response",
+		"created": parsed.Created,
+		"model":   parsed.Model,
+		"output": []map[string]interface{}{
+			{"type": "message", "role": "assistant", "content": content},
+		},
+		"stop_reason": stopReason,
+		"usage": map[string]int{
+			"input_tokens":  parsed.Usage.PromptTokens,
+			"output_tokens": parsed.Usage.CompletionTokens,
+			"total_tokens":  parsed.Usage.TotalTokens,
+		},
+	}
+
+	return json.Marshal(out)
+}
diff --git a/src/semantic-router/pkg/extproc/mapping_responses_test.go b/src/semantic-router/pkg/extproc/mapping_responses_test.go
@@ -0,0 +1,46 @@
+package extproc
+
+import (
+	"encoding/json"
+	"testing"
+)
+
+func TestMapResponsesRequestToChatCompletions_TextInput(t *testing.T) {
+	in := []byte(`{"model":"gpt-test","input":"Hello world","temperature":0.2,"top_p":0.9,"max_output_tokens":128}`)
+	out, err := mapResponsesRequestToChatCompletions(in)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	var m map[string]interface{}
+	if err := json.Unmarshal(out, &m); err != nil {
+		t.Fatalf("unmarshal mapped: %v", err)
+	}
+	if m["model"].(string) != "gpt-test" {
+		t.Fatalf("model not mapped")
+	}
+	if _, ok := m["messages"].([]interface{}); !ok {
+		t.Fatalf("messages missing")
+	}
+}
+
+func TestMapChatCompletionToResponses_Minimal(t *testing.T) {
+	in := []byte(`{
+        "id":"chatcmpl-1","object":"chat.completion","created":123,"model":"gpt-test",
+        "choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"hi"}}],
+        "usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}
+    }`)
+	out, err := mapChatCompletionToResponses(in)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	var m map[string]interface{}
+	if err := json.Unmarshal(out, &m); err != nil {
+		t.Fatalf("unmarshal mapped: %v", err)
+	}
+	if m["object"].(string) != "response" {
+		t.Fatalf("object not 'response'")
+	}
+	if m["stop_reason"].(string) == "" {
+		t.Fatalf("stop_reason missing")
+	}
+}
diff --git a/src/semantic-router/pkg/extproc/request_handler.go b/src/semantic-router/pkg/extproc/request_handler.go
@@ -329,6 +329,45 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques
 		return r.handleModelsRequest(path)
 	}
 
+	// Responses adapter: detect POST /v1/responses and gate by feature flag
+	if method == "POST" && strings.HasPrefix(path, "/v1/responses") {
+		if r.Config == nil || !r.Config.EnableResponsesAdapter {
+			observability.Warnf("/v1/responses requested but adapter disabled")
+			return r.createErrorResponse(404, "Responses API not enabled"), nil
+		}
+
+		// Prepare header mutation to rewrite :path to legacy chat completions
+		// Actual body mapping occurs in handleRequestBody
+		newPath := strings.Replace(path, "/v1/responses", "/v1/chat/completions", 1)
+
+		headerMutation := &ext_proc.HeaderMutation{
+			// Remove content-length because body will be mutated later
+			RemoveHeaders: []string{"content-length"},
+			SetHeaders: []*core.HeaderValueOption{
+				{
+					Header: &core.HeaderValue{
+						Key:      ":path",
+						RawValue: []byte(newPath),
+					},
+				},
+			},
+		}
+
+		response := &ext_proc.ProcessingResponse{
+			Response: &ext_proc.ProcessingResponse_RequestHeaders{
+				RequestHeaders: &ext_proc.HeadersResponse{
+					Response: &ext_proc.CommonResponse{
+						Status:         ext_proc.CommonResponse_CONTINUE,
+						HeaderMutation: headerMutation,
+					},
+				},
+			},
+		}
+
+		observability.Infof("Rewriting /v1/responses to %s (headers phase)", newPath)
+		return response, nil
+	}
+
 	// Prepare base response
 	response := &ext_proc.ProcessingResponse{
 		Response: &ext_proc.ProcessingResponse_RequestHeaders{
@@ -363,13 +402,28 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
 		ctx.ExpectStreamingResponse = true // Set this if stream param is found
 	}
 
+	// If path was /v1/responses and adapter enabled, map request JSON to ChatCompletion
+	if r.Config != nil && r.Config.EnableResponsesAdapter {
+		if p, ok := ctx.Headers[":path"]; ok && strings.HasPrefix(p, "/v1/responses") {
+			mapped, err := mapResponsesRequestToChatCompletions(ctx.OriginalRequestBody)
+			if err != nil {
+				observability.Errorf("Responses→Chat mapping failed: %v", err)
+				metrics.RecordRequestError(ctx.RequestModel, "parse_error")
+				return r.createErrorResponse(400, "Invalid /v1/responses payload"), nil
+			}
+
+			// Replace original body with mapped body for downstream processing
+			ctx.OriginalRequestBody = mapped
+
+			// No-op for Accept header here; downstream content negotiation remains unchanged
+		}
+	}
+
 	// Parse the OpenAI request using SDK types
 	openAIRequest, err := parseOpenAIRequest(ctx.OriginalRequestBody)
 	if err != nil {
 		observability.Errorf("Error parsing OpenAI request: %v", err)
-		// Attempt to determine model for labeling (may be unknown here)
 		metrics.RecordRequestError(ctx.RequestModel, "parse_error")
-		// Count this request as well, with unknown model if necessary
 		metrics.RecordModelRequest(ctx.RequestModel)
 		return nil, status.Errorf(codes.InvalidArgument, "invalid request body: %v", err)
 	}
diff --git a/src/semantic-router/pkg/extproc/response_handler.go b/src/semantic-router/pkg/extproc/response_handler.go
@@ -211,6 +211,27 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response
 		return response, nil
 	}
 
+	// If this was a /v1/responses request (adapter path), remap non-stream body to Responses JSON
+	if r.Config != nil && r.Config.EnableResponsesAdapter {
+		if p, ok := ctx.Headers[":path"]; ok && strings.HasPrefix(p, "/v1/responses") {
+			mapped, err := mapChatCompletionToResponses(responseBody)
+			if err == nil {
+				// Replace upstream JSON with Responses JSON
+				v.ResponseBody.Body = mapped
+				// Ensure content-type remains application/json
+				return &ext_proc.ProcessingResponse{
+					Response: &ext_proc.ProcessingResponse_ResponseBody{
+						ResponseBody: &ext_proc.BodyResponse{
+							Response: &ext_proc.CommonResponse{
+								Status: ext_proc.CommonResponse_CONTINUE,
+							},
+						},
+					},
+				}, nil
+			}
+		}
+	}
+
 	// Parse tokens from the response JSON using OpenAI SDK types
 	var parsed openai.ChatCompletion
 	if err := json.Unmarshal(responseBody, &parsed); err != nil {
diff --git a/website/docs/api/router.md b/website/docs/api/router.md

Original file line number	Diff line number	Diff line change
`@@ -131,6 +131,10 @@ type RouterConfig struct {`
`131`	`131`
`132`	`132`	`// Gateway route cache clearing`
`133`	`133`	ClearRouteCache bool `yaml:"clear_route_cache"`
	`134`	`+`
	`135`	`+ // EnableResponsesAdapter enables the compatibility shim for OpenAI Responses API (/v1/responses)`
	`136`	`+ // When enabled, POST /v1/responses requests are adapted to legacy /v1/chat/completions.`
	`137`	+ EnableResponsesAdapter bool `yaml:"enable_responses_adapter"`
`134`	`138`	`}`
`135`	`139`
`136`	`140`	`// APIConfig represents configuration for API endpoints`