@@ -62,12 +62,11 @@ func NewDirectorWithConfig(
6262 config * Config ,
6363) * Director {
6464 return & Director {
65- datastore : datastore ,
66- scheduler : scheduler ,
67- admissionController : admissionController ,
68- preRequestPlugins : config .preRequestPlugins ,
69- postResponsePlugins : config .postResponsePlugins ,
70- defaultPriority : 0 , // define default priority explicitly
65+ datastore : datastore ,
66+ scheduler : scheduler ,
67+ admissionController : admissionController ,
68+ requestControlPlugins : * config ,
69+ defaultPriority : 0 , // define default priority explicitly
7170 }
7271}
7372
@@ -81,11 +80,10 @@ func NewDirectorWithConfig(
8180// - Preparing the request context for the Envoy ext_proc filter to route the request.
8281// - Running PostResponse plugins.
8382type Director struct {
84- datastore Datastore
85- scheduler Scheduler
86- admissionController AdmissionController
87- preRequestPlugins []PreRequest
88- postResponsePlugins []PostResponse
83+ datastore Datastore
84+ scheduler Scheduler
85+ admissionController AdmissionController
86+ requestControlPlugins Config
8987 // we just need a pointer to an int variable since priority is a pointer in InferenceObjective
9088 // no need to set this in the constructor, since the value we want is the default int val
9189 // and value types cannot be nil
@@ -261,19 +259,49 @@ func (d *Director) toSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []sch
261259 return pm
262260}
263261
264- func (d * Director ) HandleResponse (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
262+ // HandleResponseReceived is called when the response headers are received.
263+ func (d * Director ) HandleResponseReceived (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
265264 response := & Response {
266265 RequestId : reqCtx .Request .Headers [requtil .RequestIdHeaderKey ],
267266 Headers : reqCtx .Response .Headers ,
268267 }
269268
270269 // TODO: to extend fallback functionality, handle cases where target pod is unavailable
271270 // https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1224
272- d .runPostResponsePlugins (ctx , reqCtx .SchedulingRequest , response , reqCtx .TargetPod )
271+ d .runResponseReceivedPlugins (ctx , reqCtx .SchedulingRequest , response , reqCtx .TargetPod )
273272
274273 return reqCtx , nil
275274}
276275
276+ // HandleResponseBodyStreaming is called every time a chunk of the response body is received.
277+ func (d * Director ) HandleResponseBodyStreaming (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
278+ logger := log .FromContext (ctx ).WithValues ("stage" , "bodyChunk" )
279+ logger .V (logutil .TRACE ).Info ("Entering HandleResponseBodyChunk" )
280+ response := & Response {
281+ RequestId : reqCtx .Request .Headers [requtil .RequestIdHeaderKey ],
282+ Headers : reqCtx .Response .Headers ,
283+ }
284+
285+ d .runResponseStreamingPlugins (ctx , reqCtx .SchedulingRequest , response , reqCtx .TargetPod )
286+ logger .V (logutil .TRACE ).Info ("Exiting HandleResponseBodyChunk" )
287+ return reqCtx , nil
288+ }
289+
290+ // HandleResponseBodyComplete is called when the response body is fully received.
291+ func (d * Director ) HandleResponseBodyComplete (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
292+ logger := log .FromContext (ctx ).WithValues ("stage" , "bodyChunk" )
293+ logger .V (logutil .DEBUG ).Info ("Entering HandleResponseBodyComplete" )
294+ response := & Response {
295+ RequestId : reqCtx .Request .Headers [requtil .RequestIdHeaderKey ],
296+ Headers : reqCtx .Response .Headers ,
297+ }
298+
299+ d .runResponseCompletePlugins (ctx , reqCtx .SchedulingRequest , response , reqCtx .TargetPod )
300+
301+ logger .V (logutil .DEBUG ).Info ("Exiting HandleResponseBodyComplete" )
302+ return reqCtx , nil
303+ }
304+
277305func (d * Director ) GetRandomPod () * backend.Pod {
278306 pods := d .datastore .PodList (backendmetrics .AllPodsPredicate )
279307 if len (pods ) == 0 {
@@ -287,22 +315,44 @@ func (d *Director) GetRandomPod() *backend.Pod {
287315func (d * Director ) runPreRequestPlugins (ctx context.Context , request * schedulingtypes.LLMRequest ,
288316 schedulingResult * schedulingtypes.SchedulingResult , targetPort int ) {
289317 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
290- for _ , plugin := range d .preRequestPlugins {
291- loggerDebug .Info ("Running pre-request plugin" , "plugin" , plugin .TypedName ())
318+ for _ , plugin := range d .requestControlPlugins . preRequestPlugins {
319+ loggerDebug .Info ("Running PreRequest plugin" , "plugin" , plugin .TypedName ())
292320 before := time .Now ()
293321 plugin .PreRequest (ctx , request , schedulingResult , targetPort )
294322 metrics .RecordPluginProcessingLatency (PreRequestExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
295- loggerDebug .Info ("Completed running pre-request plugin successfully" , "plugin" , plugin .TypedName ())
323+ loggerDebug .Info ("Completed running PreRequest plugin successfully" , "plugin" , plugin .TypedName ())
324+ }
325+ }
326+
327+ func (d * Director ) runResponseReceivedPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend.Pod ) {
328+ loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
329+ for _ , plugin := range d .requestControlPlugins .responseReceivedPlugins {
330+ loggerDebug .Info ("Running ResponseReceived plugin" , "plugin" , plugin .TypedName ())
331+ before := time .Now ()
332+ plugin .ResponseReceived (ctx , request , response , targetPod )
333+ metrics .RecordPluginProcessingLatency (ResponseReceivedExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
334+ loggerDebug .Info ("Completed running ResponseReceived plugin successfully" , "plugin" , plugin .TypedName ())
335+ }
336+ }
337+
338+ func (d * Director ) runResponseStreamingPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend.Pod ) {
339+ loggerTrace := log .FromContext (ctx ).V (logutil .TRACE )
340+ for _ , plugin := range d .requestControlPlugins .responseStreamingPlugins {
341+ loggerTrace .Info ("Running ResponseStreaming plugin" , "plugin" , plugin .TypedName ())
342+ before := time .Now ()
343+ plugin .ResponseStreaming (ctx , request , response , targetPod )
344+ metrics .RecordPluginProcessingLatency (ResponseStreamingExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
345+ loggerTrace .Info ("Completed running ResponseStreaming plugin successfully" , "plugin" , plugin .TypedName ())
296346 }
297347}
298348
299- func (d * Director ) runPostResponsePlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend.Pod ) {
349+ func (d * Director ) runResponseCompletePlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend.Pod ) {
300350 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
301- for _ , plugin := range d .postResponsePlugins {
302- loggerDebug .Info ("Running post-response plugin" , "plugin" , plugin .TypedName ())
351+ for _ , plugin := range d .requestControlPlugins . responseCompletePlugins {
352+ loggerDebug .Info ("Running ResponseComplete plugin" , "plugin" , plugin .TypedName ())
303353 before := time .Now ()
304- plugin .PostResponse (ctx , request , response , targetPod )
305- metrics .RecordPluginProcessingLatency (PostResponseExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
306- loggerDebug .Info ("Completed running post-response plugin successfully" , "plugin" , plugin .TypedName ())
354+ plugin .ResponseComplete (ctx , request , response , targetPod )
355+ metrics .RecordPluginProcessingLatency (ResponseCompleteExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
356+ loggerDebug .Info ("Completed running ResponseComplete plugin successfully" , "plugin" , plugin .TypedName ())
307357 }
308358}
0 commit comments