@@ -20,6 +20,8 @@ import (
2020 "context"
2121 "fmt"
2222 "math"
23+ "slices"
24+ "strings"
2325
2426 v1 "k8s.io/api/core/v1"
2527 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -30,13 +32,15 @@ import (
3032 "k8s.io/kubernetes/pkg/scheduler/framework"
3133
3234 llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
35+ llmazinferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
3336)
3437
3538const (
3639 Name = "ResourceFungibility"
3740 stateKey = Name
3841
39- modelNameLabelKey = llmazcoreapi .ModelNameLabelKey
42+ modelNameLabelKey = llmazcoreapi .ModelNameLabelKey
43+ inferenceServiceFlavorsAnnoKey = llmazinferenceapi .InferenceServiceFlavorsAnnoKey
4044)
4145
4246var (
@@ -151,7 +155,24 @@ func (rf *ResourceFungibility) calPreFilterState(ctx context.Context, pod *v1.Po
151155 return nil
152156 }
153157
154- for _ , f := range model .Spec .InferenceConfig .Flavors {
158+ // By default, all flavors configuired in the model will be used. But if the given annontation is set,
159+ // it means that the inference service overrides the default value with a subset of the model's flavors
160+ // and the scheduler should respect the order of flavors configured in the annotation.
161+ serviceFlavors := model .Spec .InferenceConfig .Flavors
162+ if v , ok := pod .Annotations [inferenceServiceFlavorsAnnoKey ]; ok {
163+ serviceFlavors = nil
164+ for _ , flavorName := range strings .Split (v , "," ) {
165+ idx := slices .IndexFunc (model .Spec .InferenceConfig .Flavors , func (f llmazcoreapi.Flavor ) bool {
166+ return string (f .Name ) == flavorName
167+ })
168+ if idx == - 1 {
169+ return fmt .Errorf ("flavor %q not found in model %q" , flavorName , modelName )
170+ }
171+ serviceFlavors = append (serviceFlavors , model .Spec .InferenceConfig .Flavors [idx ])
172+ }
173+ }
174+
175+ for _ , f := range serviceFlavors {
155176 if len (f .NodeSelector ) == 0 {
156177 // Once nodeSelector is empty, which means all nodes are potential candidates,
157178 // so we'll skip the Filter stage.
0 commit comments