Skip to content

Commit 877729f

Browse files
committed
response mapping init
Signed-off-by: JaredforReal <w13431838023@gmail.com>
1 parent 9cda929 commit 877729f

File tree

8 files changed

+380
-79
lines changed

8 files changed

+380
-79
lines changed

config/config.development.yaml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ categories:
6060

6161
default_model: test-model
6262

63+
# Enable OpenAI Responses API adapter (experimental)
64+
enable_responses_adapter: true
65+
6366
# Auto model name for automatic model selection (optional)
6467
# Uncomment and set to customize the model name for automatic routing
6568
# auto_model_name: "MoM"
@@ -75,31 +78,31 @@ observability:
7578
tracing:
7679
# Enable tracing for development/debugging
7780
enabled: true
78-
81+
7982
# OpenTelemetry provider
8083
provider: "opentelemetry"
81-
84+
8285
exporter:
8386
# Stdout exporter prints traces to console (great for debugging)
8487
type: "stdout"
85-
88+
8689
# No endpoint needed for stdout
8790
# endpoint: ""
8891
# insecure: true
89-
92+
9093
sampling:
9194
# Always sample in development to see all traces
9295
type: "always_on"
93-
96+
9497
# Rate not used for always_on
9598
# rate: 1.0
96-
99+
97100
resource:
98101
# Service name for trace identification
99102
service_name: "vllm-semantic-router-dev"
100-
103+
101104
# Version for development
102105
service_version: "dev"
103-
106+
104107
# Environment identifier
105108
deployment_environment: "development"

config/config.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ reasoning_families:
245245
# Global default reasoning effort level
246246
default_reasoning_effort: high
247247

248+
# Enable OpenAI Responses API adapter (experimental)
249+
enable_responses_adapter: false
250+
248251
# API Configuration
249252
api:
250253
batch_classification:

src/semantic-router/pkg/config/config.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ type RouterConfig struct {
131131

132132
// Gateway route cache clearing
133133
ClearRouteCache bool `yaml:"clear_route_cache"`
134+
135+
// EnableResponsesAdapter enables the compatibility shim for OpenAI Responses API (/v1/responses)
136+
// When enabled, POST /v1/responses requests are adapted to legacy /v1/chat/completions.
137+
EnableResponsesAdapter bool `yaml:"enable_responses_adapter"`
134138
}
135139

136140
// APIConfig represents configuration for API endpoints
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
package extproc
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"strings"
7+
)
8+
9+
// mapResponsesRequestToChatCompletions converts a minimal OpenAI Responses API request
10+
// into a legacy Chat Completions request JSON. Supports only text input for PR1.
11+
func mapResponsesRequestToChatCompletions(original []byte) ([]byte, error) {
12+
var req map[string]interface{}
13+
if err := json.Unmarshal(original, &req); err != nil {
14+
return nil, err
15+
}
16+
17+
// Extract model
18+
model, _ := req["model"].(string)
19+
if model == "" {
20+
return nil, fmt.Errorf("missing model")
21+
}
22+
23+
// Derive user content
24+
var userContent string
25+
if input, ok := req["input"]; ok {
26+
switch v := input.(type) {
27+
case string:
28+
userContent = v
29+
case []interface{}:
30+
// Join any string elements; ignore non-string for now
31+
var parts []string
32+
for _, it := range v {
33+
if s, ok := it.(string); ok {
34+
parts = append(parts, s)
35+
} else if m, ok := it.(map[string]interface{}); ok {
36+
// Try common shapes: {type:"input_text"|"text", text:"..."}
37+
if t, _ := m["type"].(string); t == "input_text" || t == "text" {
38+
if txt, _ := m["text"].(string); txt != "" {
39+
parts = append(parts, txt)
40+
}
41+
}
42+
}
43+
}
44+
userContent = strings.TrimSpace(strings.Join(parts, " "))
45+
default:
46+
// unsupported multimodal
47+
return nil, fmt.Errorf("unsupported input type")
48+
}
49+
} else if msgs, ok := req["messages"].([]interface{}); ok {
50+
// Fallback: if caller already provided messages, pass them through
51+
// This enables easy migration from chat/completions
52+
mapped := map[string]interface{}{
53+
"model": model,
54+
"messages": msgs,
55+
}
56+
// Map basic params
57+
if v, ok := req["temperature"]; ok {
58+
mapped["temperature"] = v
59+
}
60+
if v, ok := req["top_p"]; ok {
61+
mapped["top_p"] = v
62+
}
63+
if v, ok := req["max_output_tokens"]; ok {
64+
mapped["max_tokens"] = v
65+
}
66+
return json.Marshal(mapped)
67+
}
68+
69+
if userContent == "" {
70+
return nil, fmt.Errorf("empty input")
71+
}
72+
73+
// Build minimal Chat Completions request
74+
mapped := map[string]interface{}{
75+
"model": model,
76+
"messages": []map[string]interface{}{
77+
{"role": "user", "content": userContent},
78+
},
79+
}
80+
// Map basic params
81+
if v, ok := req["temperature"]; ok {
82+
mapped["temperature"] = v
83+
}
84+
if v, ok := req["top_p"]; ok {
85+
mapped["top_p"] = v
86+
}
87+
if v, ok := req["max_output_tokens"]; ok {
88+
mapped["max_tokens"] = v
89+
}
90+
91+
return json.Marshal(mapped)
92+
}
93+
94+
// mapChatCompletionToResponses converts an OpenAI ChatCompletion JSON
95+
// into a minimal Responses API JSON (non-streaming only) for PR1.
96+
func mapChatCompletionToResponses(chatCompletionJSON []byte) ([]byte, error) {
97+
var parsed struct {
98+
ID string `json:"id"`
99+
Object string `json:"object"`
100+
Created int64 `json:"created"`
101+
Model string `json:"model"`
102+
Choices []struct {
103+
Index int `json:"index"`
104+
FinishReason string `json:"finish_reason"`
105+
Message struct {
106+
Role string `json:"role"`
107+
Content string `json:"content"`
108+
} `json:"message"`
109+
} `json:"choices"`
110+
Usage struct {
111+
PromptTokens int `json:"prompt_tokens"`
112+
CompletionTokens int `json:"completion_tokens"`
113+
TotalTokens int `json:"total_tokens"`
114+
} `json:"usage"`
115+
}
116+
if err := json.Unmarshal(chatCompletionJSON, &parsed); err != nil {
117+
return nil, err
118+
}
119+
120+
content := ""
121+
stopReason := "stop"
122+
if len(parsed.Choices) > 0 {
123+
content = parsed.Choices[0].Message.Content
124+
if parsed.Choices[0].FinishReason != "" {
125+
stopReason = parsed.Choices[0].FinishReason
126+
}
127+
}
128+
129+
out := map[string]interface{}{
130+
"id": parsed.ID,
131+
"object": "response",
132+
"created": parsed.Created,
133+
"model": parsed.Model,
134+
"output": []map[string]interface{}{
135+
{"type": "message", "role": "assistant", "content": content},
136+
},
137+
"stop_reason": stopReason,
138+
"usage": map[string]int{
139+
"input_tokens": parsed.Usage.PromptTokens,
140+
"output_tokens": parsed.Usage.CompletionTokens,
141+
"total_tokens": parsed.Usage.TotalTokens,
142+
},
143+
}
144+
145+
return json.Marshal(out)
146+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package extproc
2+
3+
import (
4+
"encoding/json"
5+
"testing"
6+
)
7+
8+
func TestMapResponsesRequestToChatCompletions_TextInput(t *testing.T) {
9+
in := []byte(`{"model":"gpt-test","input":"Hello world","temperature":0.2,"top_p":0.9,"max_output_tokens":128}`)
10+
out, err := mapResponsesRequestToChatCompletions(in)
11+
if err != nil {
12+
t.Fatalf("unexpected error: %v", err)
13+
}
14+
var m map[string]interface{}
15+
if err := json.Unmarshal(out, &m); err != nil {
16+
t.Fatalf("unmarshal mapped: %v", err)
17+
}
18+
if m["model"].(string) != "gpt-test" {
19+
t.Fatalf("model not mapped")
20+
}
21+
if _, ok := m["messages"].([]interface{}); !ok {
22+
t.Fatalf("messages missing")
23+
}
24+
}
25+
26+
func TestMapChatCompletionToResponses_Minimal(t *testing.T) {
27+
in := []byte(`{
28+
"id":"chatcmpl-1","object":"chat.completion","created":123,"model":"gpt-test",
29+
"choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"hi"}}],
30+
"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}
31+
}`)
32+
out, err := mapChatCompletionToResponses(in)
33+
if err != nil {
34+
t.Fatalf("unexpected error: %v", err)
35+
}
36+
var m map[string]interface{}
37+
if err := json.Unmarshal(out, &m); err != nil {
38+
t.Fatalf("unmarshal mapped: %v", err)
39+
}
40+
if m["object"].(string) != "response" {
41+
t.Fatalf("object not 'response'")
42+
}
43+
if m["stop_reason"].(string) == "" {
44+
t.Fatalf("stop_reason missing")
45+
}
46+
}

src/semantic-router/pkg/extproc/request_handler.go

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,45 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques
329329
return r.handleModelsRequest(path)
330330
}
331331

332+
// Responses adapter: detect POST /v1/responses and gate by feature flag
333+
if method == "POST" && strings.HasPrefix(path, "/v1/responses") {
334+
if r.Config == nil || !r.Config.EnableResponsesAdapter {
335+
observability.Warnf("/v1/responses requested but adapter disabled")
336+
return r.createErrorResponse(404, "Responses API not enabled"), nil
337+
}
338+
339+
// Prepare header mutation to rewrite :path to legacy chat completions
340+
// Actual body mapping occurs in handleRequestBody
341+
newPath := strings.Replace(path, "/v1/responses", "/v1/chat/completions", 1)
342+
343+
headerMutation := &ext_proc.HeaderMutation{
344+
// Remove content-length because body will be mutated later
345+
RemoveHeaders: []string{"content-length"},
346+
SetHeaders: []*core.HeaderValueOption{
347+
{
348+
Header: &core.HeaderValue{
349+
Key: ":path",
350+
RawValue: []byte(newPath),
351+
},
352+
},
353+
},
354+
}
355+
356+
response := &ext_proc.ProcessingResponse{
357+
Response: &ext_proc.ProcessingResponse_RequestHeaders{
358+
RequestHeaders: &ext_proc.HeadersResponse{
359+
Response: &ext_proc.CommonResponse{
360+
Status: ext_proc.CommonResponse_CONTINUE,
361+
HeaderMutation: headerMutation,
362+
},
363+
},
364+
},
365+
}
366+
367+
observability.Infof("Rewriting /v1/responses to %s (headers phase)", newPath)
368+
return response, nil
369+
}
370+
332371
// Prepare base response
333372
response := &ext_proc.ProcessingResponse{
334373
Response: &ext_proc.ProcessingResponse_RequestHeaders{
@@ -363,13 +402,28 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
363402
ctx.ExpectStreamingResponse = true // Set this if stream param is found
364403
}
365404

405+
// If path was /v1/responses and adapter enabled, map request JSON to ChatCompletion
406+
if r.Config != nil && r.Config.EnableResponsesAdapter {
407+
if p, ok := ctx.Headers[":path"]; ok && strings.HasPrefix(p, "/v1/responses") {
408+
mapped, err := mapResponsesRequestToChatCompletions(ctx.OriginalRequestBody)
409+
if err != nil {
410+
observability.Errorf("Responses→Chat mapping failed: %v", err)
411+
metrics.RecordRequestError(ctx.RequestModel, "parse_error")
412+
return r.createErrorResponse(400, "Invalid /v1/responses payload"), nil
413+
}
414+
415+
// Replace original body with mapped body for downstream processing
416+
ctx.OriginalRequestBody = mapped
417+
418+
// No-op for Accept header here; downstream content negotiation remains unchanged
419+
}
420+
}
421+
366422
// Parse the OpenAI request using SDK types
367423
openAIRequest, err := parseOpenAIRequest(ctx.OriginalRequestBody)
368424
if err != nil {
369425
observability.Errorf("Error parsing OpenAI request: %v", err)
370-
// Attempt to determine model for labeling (may be unknown here)
371426
metrics.RecordRequestError(ctx.RequestModel, "parse_error")
372-
// Count this request as well, with unknown model if necessary
373427
metrics.RecordModelRequest(ctx.RequestModel)
374428
return nil, status.Errorf(codes.InvalidArgument, "invalid request body: %v", err)
375429
}

src/semantic-router/pkg/extproc/response_handler.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,27 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response
211211
return response, nil
212212
}
213213

214+
// If this was a /v1/responses request (adapter path), remap non-stream body to Responses JSON
215+
if r.Config != nil && r.Config.EnableResponsesAdapter {
216+
if p, ok := ctx.Headers[":path"]; ok && strings.HasPrefix(p, "/v1/responses") {
217+
mapped, err := mapChatCompletionToResponses(responseBody)
218+
if err == nil {
219+
// Replace upstream JSON with Responses JSON
220+
v.ResponseBody.Body = mapped
221+
// Ensure content-type remains application/json
222+
return &ext_proc.ProcessingResponse{
223+
Response: &ext_proc.ProcessingResponse_ResponseBody{
224+
ResponseBody: &ext_proc.BodyResponse{
225+
Response: &ext_proc.CommonResponse{
226+
Status: ext_proc.CommonResponse_CONTINUE,
227+
},
228+
},
229+
},
230+
}, nil
231+
}
232+
}
233+
}
234+
214235
// Parse tokens from the response JSON using OpenAI SDK types
215236
var parsed openai.ChatCompletion
216237
if err := json.Unmarshal(responseBody, &parsed); err != nil {

0 commit comments

Comments
 (0)