Skip to content

Commit 2366f0c

Browse files
danehansBenjaminBraunDev
authored andcommitted
Conformance: Adds Weight-Based Traffic Splitting Test (kubernetes-sigs#1669)
* Conformance: Adds Weight-Based Traffic Splitting Test Signed-off-by: Daneyon Hansen <daneyon.hansen@solo.io> * Resolves zetxqx review feedback Signed-off-by: Daneyon Hansen <daneyon.hansen@solo.io> --------- Signed-off-by: Daneyon Hansen <daneyon.hansen@solo.io>
1 parent 9de0af6 commit 2366f0c

File tree

2 files changed

+251
-0
lines changed

2 files changed

+251
-0
lines changed
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package tests
18+
19+
import (
20+
"fmt"
21+
"math"
22+
"net/http"
23+
"strings"
24+
"sync/atomic"
25+
"testing"
26+
27+
"github.com/stretchr/testify/require"
28+
"golang.org/x/sync/errgroup"
29+
"k8s.io/apimachinery/pkg/types"
30+
gwhttp "sigs.k8s.io/gateway-api/conformance/utils/http"
31+
"sigs.k8s.io/gateway-api/conformance/utils/suite"
32+
"sigs.k8s.io/gateway-api/pkg/features"
33+
34+
"sigs.k8s.io/gateway-api-inference-extension/conformance/resources"
35+
k8sutils "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/kubernetes"
36+
"sigs.k8s.io/gateway-api-inference-extension/conformance/utils/traffic"
37+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/test"
38+
)
39+
40+
func init() {
41+
ConformanceTests = append(ConformanceTests, GatewayWeightedAcrossTwoInferencePools)
42+
}
43+
44+
// GatewayWeightedAcrossTwoInferencePools verifies that Gateway splits traffic across two
45+
// InferencePools according to backendRef weights, and that each request is routed to an
46+
// endpoint of the selected InferencePool.
47+
var GatewayWeightedAcrossTwoInferencePools = suite.ConformanceTest{
48+
ShortName: "GatewayWeightedAcrossTwoInferencePools",
49+
Description: "Gateway should split traffic across two InferencePools based on backendRef weights and route only to endpoints of the selected InferencePool",
50+
Manifests: []string{"tests/gateway_weighted_two_pools.yaml"},
51+
Features: []features.FeatureName{
52+
features.SupportGateway,
53+
features.FeatureName("SupportInferencePool"),
54+
},
55+
Test: func(t *testing.T, s *suite.ConformanceTestSuite) {
56+
const (
57+
hostname = "primary.example.com"
58+
path = "/weighted-two-pools-test"
59+
60+
// Sample size so the weight signal dominates random noise.
61+
totalRequests = 200
62+
concurrentRequests = 5
63+
64+
// These route weights must match the test manifest.
65+
primaryWeight = 70
66+
secondaryWeight = 30
67+
)
68+
69+
// Objects under test.
70+
httpRouteNN := types.NamespacedName{Name: "httproute-weighted-two-pools", Namespace: resources.AppBackendNamespace}
71+
gatewayNN := resources.PrimaryGatewayNN
72+
primaryPoolNN := resources.PrimaryInferencePoolNN
73+
secondaryPoolNN := types.NamespacedName{Name: "secondary-inference-pool", Namespace: resources.AppBackendNamespace}
74+
75+
// Labels for the two deployments defined in base.yaml.
76+
primaryLabels := map[string]string{"app": "primary-inference-model-server"}
77+
secondaryLabels := map[string]string{"app": "secondary-inference-model-server"}
78+
79+
t.Log("Verifying HTTPRoute and both InferencePools are accepted and the Gateway has an address.")
80+
k8sutils.HTTPRouteMustBeAcceptedAndResolved(t, s.Client, s.TimeoutConfig, httpRouteNN, gatewayNN)
81+
k8sutils.InferencePoolMustBeAcceptedByParent(t, s.Client, primaryPoolNN, gatewayNN)
82+
k8sutils.InferencePoolMustBeAcceptedByParent(t, s.Client, secondaryPoolNN, gatewayNN)
83+
gwAddr := k8sutils.GetGatewayEndpoint(t, s.Client, s.TimeoutConfig, gatewayNN)
84+
85+
// Discover pods for each pool and build quick lookup sets.
86+
t.Logf("Fetching primary backend pods with labels: %v", primaryLabels)
87+
primaryPods, err := k8sutils.GetPodsWithLabel(t, s.Client, resources.AppBackendNamespace, primaryLabels, s.TimeoutConfig)
88+
require.NoError(t, err)
89+
require.Len(t, primaryPods, 3) // base.yaml uses 3 replicas
90+
91+
t.Logf("Fetching secondary backend pods with labels: %v", secondaryLabels)
92+
secondaryPods, err := k8sutils.GetPodsWithLabel(t, s.Client, resources.AppBackendNamespace, secondaryLabels, s.TimeoutConfig)
93+
require.NoError(t, err)
94+
require.Len(t, secondaryPods, 3) // base.yaml uses 3 replicas
95+
96+
primaryPodNames := make([]string, 0, len(primaryPods))
97+
primaryPodIPs := make([]string, 0, len(primaryPods))
98+
for _, p := range primaryPods {
99+
require.NotEmpty(t, p.Status.PodIP, "primary pod %s has no IP yet", p.Name)
100+
primaryPodNames = append(primaryPodNames, p.Name)
101+
primaryPodIPs = append(primaryPodIPs, p.Status.PodIP)
102+
}
103+
104+
secondaryPodNames := make([]string, 0, len(secondaryPods))
105+
secondaryPodIPs := make([]string, 0, len(secondaryPods))
106+
for _, p := range secondaryPods {
107+
require.NotEmpty(t, p.Status.PodIP, "secondary pod %s has no IP yet", p.Name)
108+
secondaryPodNames = append(secondaryPodNames, p.Name)
109+
secondaryPodIPs = append(secondaryPodIPs, p.Status.PodIP)
110+
}
111+
112+
// Send one targeted request per backend Pod to ensure EPP readiness.
113+
allIPs := append(append([]string{}, primaryPodIPs...), secondaryPodIPs...)
114+
allNames := append(append([]string{}, primaryPodNames...), secondaryPodNames...)
115+
for i := 0; i < len(allIPs); i++ {
116+
traffic.MakeRequestAndExpectSuccess(
117+
t,
118+
s.RoundTripper,
119+
s.TimeoutConfig,
120+
gwAddr,
121+
traffic.Request{
122+
Host: hostname,
123+
Path: path,
124+
Headers: map[string]string{
125+
test.HeaderTestEppEndPointSelectionKey: allIPs[i],
126+
},
127+
Method: http.MethodPost,
128+
Body: `{"model":"conformance-fake-model","prompt":"Warmup"}`,
129+
Backend: allNames[i],
130+
Namespace: resources.AppBackendNamespace,
131+
},
132+
)
133+
}
134+
135+
// Provide a union list of eligible endpoints for the test. Each pool's EPP
136+
// should filter to endpoints that actually belong to its pool.
137+
eppHeaderValue := strings.Join(allIPs, ",")
138+
139+
requestBody := `{
140+
"model": "conformance-fake-model",
141+
"prompt": "Write as if you were a critic: San Francisco"
142+
}`
143+
144+
// Build quick lookup sets for attributing each hit to a pool by backend pod name.
145+
primarySet := make(map[string]struct{}, len(primaryPodNames))
146+
for _, n := range primaryPodNames {
147+
primarySet[n] = struct{}{}
148+
}
149+
secondarySet := make(map[string]struct{}, len(secondaryPodNames))
150+
for _, n := range secondaryPodNames {
151+
secondarySet[n] = struct{}{}
152+
}
153+
154+
headers := map[string]string{
155+
test.HeaderTestEppEndPointSelectionKey: eppHeaderValue,
156+
}
157+
expected := gwhttp.ExpectedResponse{
158+
Request: gwhttp.Request{
159+
Host: hostname,
160+
Path: path,
161+
Method: http.MethodPost,
162+
Headers: headers,
163+
},
164+
Response: gwhttp.Response{
165+
StatusCode: http.StatusOK,
166+
},
167+
Namespace: resources.AppBackendNamespace,
168+
}
169+
req := gwhttp.MakeRequest(t, &expected, gwAddr, "HTTP", "http")
170+
171+
var primaryHits, secondaryHits atomic.Int64
172+
var g errgroup.Group
173+
g.SetLimit(concurrentRequests)
174+
175+
for i := 0; i < totalRequests; i++ {
176+
g.Go(func() error {
177+
cReq, cRes, err := traffic.MakeCallRoundTripper(t, s.RoundTripper, &traffic.RequestWithBody{
178+
Request: req,
179+
Body: strings.NewReader(requestBody),
180+
})
181+
if err != nil {
182+
return fmt.Errorf("failed to roundtrip request: %w", err)
183+
}
184+
if err := gwhttp.CompareRoundTrip(t, &req, cReq, cRes, expected); err != nil {
185+
return fmt.Errorf("response expectation failed: %w", err)
186+
}
187+
188+
// Attribute response to pool by backend pod name.
189+
if _, ok := primarySet[cReq.Pod]; ok {
190+
primaryHits.Add(1)
191+
} else if _, ok := secondarySet[cReq.Pod]; ok {
192+
secondaryHits.Add(1)
193+
} else {
194+
return fmt.Errorf("request was handled by unexpected pod %q (not in either pool)", cReq.Pod)
195+
}
196+
return nil
197+
})
198+
}
199+
require.NoError(t, g.Wait(), "requests failed")
200+
201+
ph := float64(primaryHits.Load())
202+
sh := float64(secondaryHits.Load())
203+
total := ph + sh
204+
require.Equal(t, int64(totalRequests), int64(total), "sum of hits must equal number of attempts")
205+
require.Greater(t, total, 0.0)
206+
207+
observedPrimary := ph / total
208+
expectedPrimary := float64(primaryWeight) / float64(primaryWeight+secondaryWeight)
209+
210+
// Allow either a 10 percentage-point absolute error, or a 3-sigma binomial CI.
211+
sigma := math.Sqrt(expectedPrimary * (1.0 - expectedPrimary) / total)
212+
absTolerance := math.Max(0.10, 3.0*sigma)
213+
214+
diff := math.Abs(observedPrimary - expectedPrimary)
215+
require.LessOrEqualf(t, diff, absTolerance,
216+
"weighted split out of bounds: observed primary=%.3f (hits=%d/%d), expected=%.3f, tolerance=±%.3f",
217+
observedPrimary, int64(ph), int64(total), expectedPrimary, absTolerance)
218+
t.Logf("Weighted split OK: primary=%.3f (hits=%d/%d), expected=%.3f, tolerance=±%.3f; secondary hits=%d",
219+
observedPrimary, int64(ph), int64(total), expectedPrimary, absTolerance, int64(sh))
220+
},
221+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: HTTPRoute
3+
metadata:
4+
name: httproute-weighted-two-pools
5+
namespace: gateway-conformance-app-backend
6+
spec:
7+
parentRefs:
8+
- group: gateway.networking.k8s.io
9+
kind: Gateway
10+
name: conformance-primary
11+
namespace: gateway-conformance-infra
12+
sectionName: http
13+
hostnames:
14+
- "primary.example.com"
15+
rules:
16+
- matches:
17+
- path:
18+
type: PathPrefix
19+
value: /weighted-two-pools-test
20+
backendRefs:
21+
# 70% of traffic goes to the primary pool
22+
- group: inference.networking.k8s.io
23+
kind: InferencePool
24+
name: primary-inference-pool
25+
weight: 70
26+
# 30% of traffic goes to the secondary pool
27+
- group: inference.networking.k8s.io
28+
kind: InferencePool
29+
name: secondary-inference-pool
30+
weight: 30

0 commit comments

Comments
 (0)