Skip to content

Commit 5bd41ad

Browse files
authored
Fix prefix-cache-scorer benchmark panic (#1664)
* Fix prefix-cache-scorer benchmark panic * fix lint
1 parent 637055b commit 5bd41ad

File tree

2 files changed

+37
-40
lines changed

2 files changed

+37
-40
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ test-unit: ## Run unit tests.
144144
go tool cover -func=cover.out; \
145145
rm cover.out
146146

147+
.PHONY: test-benchmark
148+
test-benchmark: ## Run benchmarks.
149+
CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./pkg/... -bench=. -benchmem;
150+
147151
.PHONY: test-integration
148152
test-integration: envtest ## Run integration tests.
149153
CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration/epp/... -race -coverprofile cover.out

pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go

Lines changed: 33 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package prefix
1919
import (
2020
"context"
2121
"fmt"
22-
"math"
2322
"math/rand"
2423
"strings"
2524
"testing"
@@ -357,49 +356,44 @@ func BenchmarkPrefixPluginStress(b *testing.B) {
357356
plugin := New(context.Background(), config)
358357
types.NewCycleState()
359358
var promptLen []int
360-
for i := 1; i <= 1024; i++ {
359+
for i := 1; i <= 1024; {
361360
promptLen = append(promptLen, i)
361+
i += 10
362362
}
363363
promptLen = append(promptLen, 2048, 4096, 8192, 10000, 20000, 50000)
364364

365-
for _, i := range promptLen {
366-
// Generate increasing-length random prompts
367-
prompt := randomPrompt(4 + i)
368-
pod := &types.PodMetrics{
369-
Pod: &backend.Pod{
370-
NamespacedName: k8stypes.NamespacedName{
371-
Name: fmt.Sprintf("random-pod-%d", i),
365+
for i, v := range promptLen {
366+
b.Run(fmt.Sprintf("messages_%d_length_%d", i, v), func(b *testing.B) {
367+
// Generate increasing-length random prompts
368+
prompt := randomPrompt(4 + v)
369+
pod := &types.PodMetrics{
370+
Pod: &backend.Pod{
371+
NamespacedName: k8stypes.NamespacedName{
372+
Name: fmt.Sprintf("random-pod-%d", v),
373+
},
372374
},
373-
},
374-
}
375-
376-
pods := []types.Pod{pod}
377-
req := &types.LLMRequest{
378-
RequestId: uuid.NewString(),
379-
TargetModel: "model-stress",
380-
Body: &types.LLMRequestBody{
381-
Completions: &types.CompletionsRequest{
382-
Prompt: prompt,
375+
}
376+
377+
pods := []types.Pod{pod}
378+
req := &types.LLMRequest{
379+
RequestId: uuid.NewString(),
380+
TargetModel: "model-stress",
381+
Body: &types.LLMRequestBody{
382+
Completions: &types.CompletionsRequest{
383+
Prompt: prompt,
384+
},
383385
},
384-
},
385-
}
386-
387-
// First cycle: simulate scheduling and insert prefix info into the cache
388-
plugin.Score(context.Background(), types.NewCycleState(), req, pods)
389-
schedulingResult := &types.SchedulingResult{
390-
PrimaryProfileName: "default",
391-
ProfileResults: map[string]*types.ProfileRunResult{
392-
"default": {TargetPods: []types.Pod{pod}},
393-
},
394-
}
395-
plugin.PreRequest(context.Background(), req, schedulingResult, 0)
396-
plugin.wg.Wait()
397-
398-
// Second cycle: validate internal state
399-
state, err := plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req.RequestId, plugins.StateKey(plugin.TypedName().String()))
400-
assert.NoError(b, err)
401-
expectedHashes := int(math.Min(float64(maxPrefixBlocks), float64(len(req.Body.Completions.Prompt)/blockSize)))
402-
assert.Equal(b, expectedHashes, len(state.PrefixHashes), "number of hashes is incorrect")
386+
}
387+
388+
b.ResetTimer()
389+
// Benchmark the scoring operation
390+
scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods)
391+
_ = scores // Use the result to prevent optimization
392+
393+
// Clean up state for next iteration
394+
plugin.pluginState.Delete(req.RequestId)
395+
})
396+
403397
}
404398
}
405399

@@ -422,7 +416,6 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
422416
MaxPrefixBlocksToMatch: maxPrefixBlocks,
423417
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
424418
}
425-
426419
plugin := New(context.Background(), config)
427420

428421
// Test scenarios: varying number of messages and message lengths
@@ -476,7 +469,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
476469
b.ResetTimer()
477470
for i := 0; i < b.N; i++ {
478471
// Benchmark the scoring operation
479-
scores := plugin.Score(context.Background(), nil, req, pods)
472+
scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods)
480473
_ = scores // Use the result to prevent optimization
481474

482475
// Clean up state for next iteration

0 commit comments

Comments
 (0)