Skip to content

Commit e3021c7

Browse files
committed
Address review comments
1 parent d6c81d9 commit e3021c7

File tree

4 files changed

+12
-24
lines changed

4 files changed

+12
-24
lines changed

pkg/epp/requestcontrol/director.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"strings"
2727
"time"
2828

29-
"github.com/go-logr/logr"
3029
"sigs.k8s.io/controller-runtime/pkg/log"
3130

3231
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
@@ -92,10 +91,10 @@ type Director struct {
9291
}
9392

9493
// getInferenceObjective fetches the inferenceObjective from the datastore otherwise creates a new one based on reqCtx.
95-
func (d *Director) getInferenceObjective(logger logr.Logger, reqCtx *handlers.RequestContext) *v1alpha2.InferenceObjective {
94+
func (d *Director) getInferenceObjective(ctx context.Context, reqCtx *handlers.RequestContext) *v1alpha2.InferenceObjective {
9695
infObjective := d.datastore.ObjectiveGet(reqCtx.ObjectiveKey)
9796
if infObjective == nil {
98-
logger.V(logutil.VERBOSE).Info("No associated InferenceObjective found, using default", "objectiveKey", reqCtx.ObjectiveKey)
97+
log.FromContext(ctx).V(logutil.VERBOSE).Info("No associated InferenceObjective found, using default", "objectiveKey", reqCtx.ObjectiveKey)
9998
infObjective = &v1alpha2.InferenceObjective{
10099
Spec: v1alpha2.InferenceObjectiveSpec{
101100
Priority: &d.defaultPriority,
@@ -108,8 +107,7 @@ func (d *Director) getInferenceObjective(logger logr.Logger, reqCtx *handlers.Re
108107
return infObjective
109108
}
110109

111-
// resolveTargetModel is a helper that resolves targetModel
112-
// and updates the reqCtx.
110+
// resolveTargetModel is a helper to update reqCtx with target model based on request.
113111
func (d *Director) resolveTargetModel(reqCtx *handlers.RequestContext) error {
114112
requestBodyMap := reqCtx.Request.Body
115113
var ok bool
@@ -143,7 +141,7 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
143141
}
144142

145143
// Parse inference objective.
146-
infObjective := d.getInferenceObjective(logger, reqCtx)
144+
infObjective := d.getInferenceObjective(ctx, reqCtx)
147145

148146
// Prepare LLMRequest (needed for both saturation detection and Scheduler)
149147
reqCtx.SchedulingRequest = &schedulingtypes.LLMRequest{
@@ -163,7 +161,7 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
163161
if len(candidatePods) == 0 {
164162
return reqCtx, errutil.Error{Code: errutil.ServiceUnavailable, Msg: "failed to find candidate pods for serving the request"}
165163
}
166-
164+
// TODO(rahulgurnani/lukevandrie): Perhaps, refactor/implement Admit plugin for Admission control.
167165
if err := d.admissionController.Admit(ctx, reqCtx, candidatePods, *infObjective.Spec.Priority); err != nil {
168166
logger.V(logutil.DEFAULT).Info("Request rejected by admission control", "error", err)
169167
return reqCtx, err
@@ -360,7 +358,8 @@ func (d *Director) runAdmitRequestPlugins(ctx context.Context,
360358
loggerDebug := log.FromContext(ctx).V(logutil.DEBUG)
361359
for _, plugin := range d.requestControlPlugins.admitRequestPlugins {
362360
loggerDebug.Info("Running AdmitRequest plugin", "plugin", plugin.TypedName())
363-
if !plugin.Admit(ctx, request, pods) {
361+
if denyReason := plugin.Admit(ctx, request, pods); denyReason != "" {
362+
loggerDebug.Info("AdmitRequest plugin denied the request", "plugin", plugin.TypedName(), "reason", denyReason)
364363
return false
365364
}
366365
loggerDebug.Info("Completed running AdmitRequest plugin successfully", "plugin", plugin.TypedName())

pkg/epp/requestcontrol/director_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,9 @@ func (m *mockAdmitRequestPlugins) TypedName() plugins.TypedName {
127127
return m.tn
128128
}
129129

130-
func (m *mockAdmitRequestPlugins) Admit(ctx context.Context, request *schedulingtypes.LLMRequest, pods []schedulingtypes.Pod) bool {
130+
func (m *mockAdmitRequestPlugins) Admit(ctx context.Context, request *schedulingtypes.LLMRequest, pods []schedulingtypes.Pod) string {
131131
m.admitRequestCalled = true
132-
return true
132+
return ""
133133
}
134134

135135
func TestDirector_HandleRequest(t *testing.T) {

pkg/epp/requestcontrol/plugins.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,6 @@ type PrepareData interface {
6969
// AdmitRequest plugin is implemented by plugins for admission control. These plugins need to implement Admit method.
7070
type AdmitRequest interface {
7171
plugins.Plugin
72-
Admit(ctx context.Context, request *types.LLMRequest, pods []types.Pod) bool
72+
// Admit returns the denial reason if the request is denied. If the request is allowed, it returns an empty string.
73+
Admit(ctx context.Context, request *types.LLMRequest, pods []types.Pod) string
7374
}

pkg/epp/scheduling/types/types.go

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ type Cloneable interface {
254254

255255
// AttributeMap is used to store flexible metadata or traits
256256
// across different aspects of an inference server.
257-
// Stored values must be Cloneable.
257+
// Stored values must be Cloneable. This is a per-request snapshot of the attributes.
258258
type AttributeMap interface {
259259
Put(string, Cloneable)
260260
Get(string) (Cloneable, bool)
@@ -290,18 +290,6 @@ func (a *Attributes) Get(key string) (Cloneable, bool) {
290290
return nil, false
291291
}
292292

293-
// Keys returns all keys in the attribute map.
294-
func (a *Attributes) Keys() []string {
295-
var keys []string
296-
a.data.Range(func(key, _ any) bool {
297-
if sk, ok := key.(string); ok {
298-
keys = append(keys, sk)
299-
}
300-
return true
301-
})
302-
return keys
303-
}
304-
305293
// Clone creates a deep copy of the entire attribute map.
306294
func (a *Attributes) Clone() *Attributes {
307295
clone := NewAttributes()

0 commit comments

Comments
 (0)