Update prefix match plugin to implement PrepareData plugin

rahulgurnani · rahulgurnani · commit 7ae9f7ee8176 · 2025-11-11T19:50:54.000Z
diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go
@@ -57,6 +57,8 @@ const (
 	DefaultLRUCapacityPerServer = 31250
 
 	PrefixCachePluginType = "prefix-cache-scorer"
+
+	PrefixCacheMatchKey = "PrefixCacheMatchKey"
 )
 
 const (
@@ -195,17 +197,48 @@ func (p *Plugin) WithName(name string) *Plugin {
 	return p
 }
 
-// Score returns the scoring result for the given list of pods based on context.
-func (p *Plugin) Score(ctx context.Context, cycleState *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
+func (p *Plugin) Consumes() map[string]any {
+	return map[string]any{}
+}
+
+func (p *Plugin) Produces() map[string]any {
+	return map[string]any{
+		PrefixCacheMatchKey: &SchedulingContextState{},
+	}
+}
+
+func (p *Plugin) PrepareRequestData(ctx context.Context, request *types.LLMRequest, pods []types.Pod) {
 	// pre score step, hashing prompt and find longest prefix match.
 	hashes := hashPrompt(ctx, request, getBlockSize(pods, p.config.DefaultBlockSize), p.config.MaxPrefixBlocksToMatch)
 	state := &SchedulingContextState{
 		PrefixHashes:       hashes,
 		PrefixCacheServers: p.matchLongestPrefix(ctx, hashes),
 	}
 
-	cycleState.Write(plugins.StateKey(p.TypedName().String()), state)
+	// TODO: Instead store this in the pods attribute map to avoid global state in the plugin.
 	p.pluginState.Write(request.RequestId, plugins.StateKey(p.TypedName().String()), state)
+}
+
+// Score returns the scoring result for the given list of pods based on context.
+func (p *Plugin) Score(ctx context.Context, cycleState *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
+	// TODO(rahulgurnani): Remove duplication with PrepareRequestData after testing.
+	state, err := plugins.ReadPluginStateKey[*SchedulingContextState](p.pluginState, request.RequestId, plugins.StateKey(p.TypedName().String()))
+	if err != nil {
+		// This should not happen, but in case it does, we recalculate the state.
+		// In unit tests, this doesn't happen as PrepareRequestData is always called before Score.
+		// TODO: When the prefix plugin is split into separate score plugin and pre-request plugin,
+		// remove this recalculation.
+		log.FromContext(ctx).Error(err, "failed to read prefix plugin state, recalculating")
+		hashes := hashPrompt(ctx, request, getBlockSize(pods, p.config.DefaultBlockSize), p.config.MaxPrefixBlocksToMatch)
+		state = &SchedulingContextState{
+			PrefixHashes:       hashes,
+			PrefixCacheServers: p.matchLongestPrefix(ctx, hashes),
+		}
+		p.pluginState.Write(request.RequestId, plugins.StateKey(p.TypedName().String()), state)
+	}
+	// TODO(rahulgurnani): cleanup the cycleState after all the changes are done. Seems llm-d-scheduler relies on cyclestate presently.
+	cycleState.Write(plugins.StateKey(p.TypedName().String()), state)
+
 	log.FromContext(ctx).V(logutil.TRACE).Info("prefix cached state", "cached-servers", state.PrefixCacheServers, "hashes", state.PrefixHashes)
 	// calculate the scores of pods
 	scores := make(map[types.Pod]float64, len(pods))
diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go
@@ -55,6 +55,7 @@ func TestPrefixPluginCompletion(t *testing.T) {
 			},
 		},
 	}
+	plugin.PrepareRequestData(context.Background(), req1, pods)
 	scores := plugin.Score(context.Background(), types.NewCycleState(), req1, pods)
 	state, err := plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req1.RequestId, plugins.StateKey(plugin.TypedName().String()))
 	assert.NoError(t, err)
@@ -87,6 +88,7 @@ func TestPrefixPluginCompletion(t *testing.T) {
 			},
 		},
 	}
+	plugin.PrepareRequestData(context.Background(), req2, pods)
 	scores = plugin.Score(context.Background(), types.NewCycleState(), req2, pods)
 	state, err = plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req2.RequestId, plugins.StateKey(plugin.TypedName().String()))
 	assert.NoError(t, err)
@@ -118,6 +120,7 @@ func TestPrefixPluginCompletion(t *testing.T) {
 			},
 		},
 	}
+	plugin.PrepareRequestData(context.Background(), req3, pods)
 	scores = plugin.Score(context.Background(), types.NewCycleState(), req3, pods)
 	state, err = plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req3.RequestId, plugins.StateKey(plugin.TypedName().String()))
 	assert.NoError(t, err)
@@ -148,6 +151,7 @@ func TestPrefixPluginCompletion(t *testing.T) {
 			},
 		},
 	}
+	plugin.PrepareRequestData(context.Background(), req4, pods)
 	scores = plugin.Score(context.Background(), types.NewCycleState(), req4, pods)
 	state, err = plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req4.RequestId, plugins.StateKey(plugin.TypedName().String()))
 	assert.NoError(t, err)
@@ -178,6 +182,7 @@ func TestPrefixPluginCompletion(t *testing.T) {
 			},
 		},
 	}
+	plugin.PrepareRequestData(context.Background(), req5, pods)
 	scores = plugin.Score(context.Background(), types.NewCycleState(), req5, pods)
 	state, err = plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req5.RequestId, plugins.StateKey(plugin.TypedName().String()))
 	assert.NoError(t, err)
@@ -223,6 +228,7 @@ func TestPrefixPluginChatCompletions(t *testing.T) {
 			},
 		},
 	}
+	plugin.PrepareRequestData(context.Background(), req1, pods)
 	scores := plugin.Score(context.Background(), types.NewCycleState(), req1, pods)
 	state, err := plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req1.RequestId, plugins.StateKey(plugin.TypedName().String()))
 	assert.NoError(t, err)
@@ -258,6 +264,7 @@ func TestPrefixPluginChatCompletionsGrowth(t *testing.T) {
 			},
 		},
 	}
+	plugin.PrepareRequestData(context.Background(), req1, pods)
 	scores := plugin.Score(context.Background(), types.NewCycleState(), req1, pods)
 	state, err := plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req1.RequestId, plugins.StateKey(plugin.TypedName().String()))
 	assert.NoError(t, err)
@@ -293,6 +300,7 @@ func TestPrefixPluginChatCompletionsGrowth(t *testing.T) {
 			},
 		},
 	}
+	plugin.PrepareRequestData(context.Background(), req2, pods)
 	scores = plugin.Score(context.Background(), types.NewCycleState(), req2, pods)
 	state, err = plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req2.RequestId, plugins.StateKey(plugin.TypedName().String()))
 	assert.NoError(t, err)
@@ -328,6 +336,7 @@ func TestPrefixPluginChatCompletionsGrowth(t *testing.T) {
 			},
 		},
 	}
+	plugin.PrepareRequestData(context.Background(), req3, pods)
 	scores = plugin.Score(context.Background(), types.NewCycleState(), req3, pods)
 	state, err = plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req3.RequestId, plugins.StateKey(plugin.TypedName().String()))
 	assert.NoError(t, err)
@@ -387,6 +396,7 @@ func BenchmarkPrefixPluginStress(b *testing.B) {
 			}
 
 			b.ResetTimer()
+			plugin.PrepareRequestData(context.Background(), req, pods)
 			// Benchmark the scoring operation
 			scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods)
 			_ = scores // Use the result to prevent optimization
@@ -468,8 +478,9 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
 			}
 
 			b.ResetTimer()
-			for i := 0; i < b.N; i++ {
+			for b.Loop() {
 				// Benchmark the scoring operation
+				plugin.PrepareRequestData(context.Background(), req, pods)
 				scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods)
 				_ = scores // Use the result to prevent optimization