From c749e2dc3bee0d5aace0b0d8e584767021a8dd89 Mon Sep 17 00:00:00 2001 From: bobzetian Date: Wed, 5 Nov 2025 02:32:33 +0000 Subject: [PATCH] Add inferenceomodelrewrite api. --- apix/v1alpha2/inferencemodelrewrite_types.go | 195 ++++++++++++++ apix/v1alpha2/inferenceobjective_types.go | 21 -- apix/v1alpha2/shared_types.go | 21 ++ apix/v1alpha2/zz_generated.deepcopy.go | 186 ++++++++++++++ apix/v1alpha2/zz_generated.register.go | 2 + .../apix/v1alpha2/inferencemodelrewrite.go | 242 ++++++++++++++++++ .../v1alpha2/inferencemodelrewriterule.go | 58 +++++ .../v1alpha2/inferencemodelrewritespec.go | 53 ++++ .../v1alpha2/inferencemodelrewritestatus.go | 48 ++++ .../applyconfiguration/apix/v1alpha2/match.go | 39 +++ .../apix/v1alpha2/modelmatch.go | 52 ++++ .../apix/v1alpha2/targetmodel.go | 48 ++++ client-go/applyconfiguration/utils.go | 14 + .../typed/apix/v1alpha2/apix_client.go | 5 + .../apix/v1alpha2/fake/fake_apix_client.go | 4 + .../fake/fake_inferencemodelrewrite.go | 53 ++++ .../apix/v1alpha2/generated_expansion.go | 2 + .../apix/v1alpha2/inferencemodelrewrite.go | 74 ++++++ .../apix/v1alpha2/inferencemodelrewrite.go | 102 ++++++++ .../apix/v1alpha2/interface.go | 7 + .../informers/externalversions/generic.go | 2 + .../apix/v1alpha2/expansion_generated.go | 8 + .../apix/v1alpha2/inferencemodelrewrite.go | 70 +++++ ...rking.x-k8s.io_inferencemodelrewrites.yaml | 236 +++++++++++++++++ .../1816-inferenceomodelrewrite/README.md | 235 +++++++++++++++++ 25 files changed, 1756 insertions(+), 21 deletions(-) create mode 100644 apix/v1alpha2/inferencemodelrewrite_types.go create mode 100644 client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewrite.go create mode 100644 client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewriterule.go create mode 100644 client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewritespec.go create mode 100644 client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewritestatus.go create mode 100644 client-go/applyconfiguration/apix/v1alpha2/match.go create mode 100644 client-go/applyconfiguration/apix/v1alpha2/modelmatch.go create mode 100644 client-go/applyconfiguration/apix/v1alpha2/targetmodel.go create mode 100644 client-go/clientset/versioned/typed/apix/v1alpha2/fake/fake_inferencemodelrewrite.go create mode 100644 client-go/clientset/versioned/typed/apix/v1alpha2/inferencemodelrewrite.go create mode 100644 client-go/informers/externalversions/apix/v1alpha2/inferencemodelrewrite.go create mode 100644 client-go/listers/apix/v1alpha2/inferencemodelrewrite.go create mode 100644 config/crd/bases/inference.networking.x-k8s.io_inferencemodelrewrites.yaml create mode 100644 docs/proposals/1816-inferenceomodelrewrite/README.md diff --git a/apix/v1alpha2/inferencemodelrewrite_types.go b/apix/v1alpha2/inferencemodelrewrite_types.go new file mode 100644 index 000000000..b04b3f6f2 --- /dev/null +++ b/apix/v1alpha2/inferencemodelrewrite_types.go @@ -0,0 +1,195 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha2 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// InferenceModelRewrite is the Schema for the InferenceModelRewrite API. +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Inference Pool",type=string,JSONPath=`.spec.poolRef.name` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` +// +genclient +type InferenceModelRewrite struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec InferenceModelRewriteSpec `json:"spec,omitempty"` + Status InferenceModelRewriteStatus `json:"status,omitempty"` +} + +// InferenceModelRewriteList contains a list of InferenceModelRewrite. +// +// +kubebuilder:object:root=true +type InferenceModelRewriteList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []InferenceModelRewrite `json:"items"` +} + +// InferenceModelRewriteSpec defines the desired state of InferenceModelRewrite. +type InferenceModelRewriteSpec struct { + // PoolRef is a reference to the inference pool. + // +kubebuilder:validation:Required + PoolRef PoolObjectReference `json:"poolRef"` + + // Rules are the ordered set of rules for rewriting inference requests. + // The first rule to match a request will be used. + + // + // --- Precedence and Conflict Resolution --- + // If multiple InferenceModelRewrite resources target the same + // InferencePool, the controller will merge them based on precedence. + // + // **Timestamp Wins:** If two rules from different policies all matches, + // the rule from the *oldest* + // InferenceModelRewrite resource (determined by + // metadata.creationTimestamp) will be used. + // +required + Rules []InferenceModelRewriteRule `json:"rules"` +} + +// InferenceModelRewriteRule defines the match criteria and corresponding action. +// +// A specific model name can only be matched by one rule across all +// policies attached to the same InferencePool. If multiple policies attempt +// to match the same model name, the oldest policy (by creationTimestamp) +// will be the only one considered valid. Newer policies with conflicting +// matches will be marked as invalid in their status. +type InferenceModelRewriteRule struct { + // Matches defines the criteria for matching a request. + // If multiple match criteria are specified, a request matches if + // ANY of the criteria are satisfied (logical OR). + // If empty, the rule matches all requests. + + // +optional + Matches []Match `json:"matches,omitempty"` + + // --- Actions --- + // Targets defines how to distribute traffic across a set of + // weighted model targets. This is used for traffic splitting, A/B tests, + // or canary rollouts. + // +optional + // +kubebuilder:validation:MinItems=1 + // + Targets []TargetModel `json:"split,omitempty"` +} + +// TargetModel defines a weighted model destination for traffic distribution. +type TargetModel struct { + // (The following comment is copied from the original targetModel) + // Weight is used to determine the proportion of traffic that should be + // sent to this model when multiple target models are specified. + // + // Weight defines the proportion of requests forwarded to the specified + // model. This is computed as weight/(sum of all weights in this + // TargetModels list). For non-zero values, there may be some epsilon from + // the exact proportion defined here depending on the precision an + // implementation supports. Weight is not a percentage and the sum of + // weights does not need to equal 100. + // + // If a weight is set for any targetModel, it must be set for all targetModels. + // Conversely weights are optional, so long as ALL targetModels do not specify a weight. + // + // +optional + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=1000000 + Weight int32 `json:"weight"` + + // --- Destination Types --- + // ModelRewrite specifies a static model name destination. + // +optional + ModelRewrite string `json:"modelRewrite"` +} + +// Match defines the criteria for matching the LLM requests. +type Match struct { + // Model specifies the criteria for matching the 'model' field + // within the JSON request body. + // +required + Model *ModelMatch `json:"model,omitempty"` +} + +// ModelMatch defines how to match against the model name in the request body. +type ModelMatch struct { + // Type specifies the kind of string matching to use. + // Supported value is "Exact". Defaults to "Exact". + // +optional + // +kubebuilder:default=Exact + Type *MatchValidationType `json:"type,omitempty"` + + // Value is the model name string to match against. + // +required + // +kubebuilder:validation:MinLength=1 + Value string `json:"value"` +} + +// MatchValidationType specifies the type of string matching to use. +// +kubebuilder:validation:Enum=Exact +type MatchValidationType string + +const ( + // MatchExact indicates that the model name must match exactly. + MatchExact MatchValidationType = "Exact" +) + +// InferenceModelRewriteStatus defines the observed state of InferenceModelRewrite. +type InferenceModelRewriteStatus struct { + // Conditions track the state of the InferenceModelRewrite. + // + // Known condition types are: + // + // * "Accepted" + // + // +optional + // +listType=map + // +listMapKey=type + // +kubebuilder:validation:MaxItems=8 + // +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// InferenceModelRewriteConditionType is a type of condition for the InferenceModelRewrite. +type InferenceModelRewriteConditionType string + +// InferenceModelRewriteConditionReason is the reason for a given InferenceModelRewriteConditionType. +type InferenceModelRewriteConditionReason string + +const ( + // RewriteConditionAccepted indicates if the rewrite policy is accepted, and if not, why. + // This is the primary condition for this resource. + // + // Possible reasons for this condition to be True are: + // + // * "Accepted" + // + // Possible reasons for this condition to be Unknown are: + // + // * "Pending" + // + RewriteConditionAccepted InferenceModelRewriteConditionType = "Accepted" + + // RewriteReasonAccepted indicates the policy is valid, non-conflicting, + // and has been successfully applied to the inference pool. + RewriteReasonAccepted InferenceModelRewriteConditionReason = "Accepted" + + // RewriteReasonPending is the initial state, and indicates that the + // controller has not yet reconciled the InferenceModelRewrite. + RewriteReasonPending InferenceModelRewriteConditionReason = "Pending" +) diff --git a/apix/v1alpha2/inferenceobjective_types.go b/apix/v1alpha2/inferenceobjective_types.go index 691b8819e..ecd50b30d 100644 --- a/apix/v1alpha2/inferenceobjective_types.go +++ b/apix/v1alpha2/inferenceobjective_types.go @@ -78,27 +78,6 @@ type InferenceObjectiveSpec struct { PoolRef PoolObjectReference `json:"poolRef"` } -// PoolObjectReference identifies an API object within the namespace of the -// referrer. -type PoolObjectReference struct { - // Group is the group of the referent. - // - // +optional - // +kubebuilder:default="inference.networking.k8s.io" - Group Group `json:"group,omitempty"` - - // Kind is kind of the referent. For example "InferencePool". - // - // +optional - // +kubebuilder:default="InferencePool" - Kind Kind `json:"kind,omitempty"` - - // Name is the name of the referent. - // - // +kubebuilder:validation:Required - Name ObjectName `json:"name"` -} - // InferenceObjectiveStatus defines the observed state of InferenceObjective type InferenceObjectiveStatus struct { // Conditions track the state of the InferenceObjective. diff --git a/apix/v1alpha2/shared_types.go b/apix/v1alpha2/shared_types.go index 1fe46ddae..4fc2c7782 100644 --- a/apix/v1alpha2/shared_types.go +++ b/apix/v1alpha2/shared_types.go @@ -127,3 +127,24 @@ type LabelKey string // +kubebuilder:validation:MaxLength=63 // +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$` type LabelValue string + +// PoolObjectReference identifies an API object within the namespace of the +// referrer. +type PoolObjectReference struct { + // Group is the group of the referent. + // + // +optional + // +kubebuilder:default="inference.networking.k8s.io" + Group Group `json:"group,omitempty"` + + // Kind is kind of the referent. For example "InferencePool". + // + // +optional + // +kubebuilder:default="InferencePool" + Kind Kind `json:"kind,omitempty"` + + // Name is the name of the referent. + // + // +kubebuilder:validation:Required + Name ObjectName `json:"name"` +} diff --git a/apix/v1alpha2/zz_generated.deepcopy.go b/apix/v1alpha2/zz_generated.deepcopy.go index 0249d442d..2f7dca28d 100644 --- a/apix/v1alpha2/zz_generated.deepcopy.go +++ b/apix/v1alpha2/zz_generated.deepcopy.go @@ -60,6 +60,137 @@ func (in *Extension) DeepCopy() *Extension { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceModelRewrite) DeepCopyInto(out *InferenceModelRewrite) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelRewrite. +func (in *InferenceModelRewrite) DeepCopy() *InferenceModelRewrite { + if in == nil { + return nil + } + out := new(InferenceModelRewrite) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *InferenceModelRewrite) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceModelRewriteList) DeepCopyInto(out *InferenceModelRewriteList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]InferenceModelRewrite, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelRewriteList. +func (in *InferenceModelRewriteList) DeepCopy() *InferenceModelRewriteList { + if in == nil { + return nil + } + out := new(InferenceModelRewriteList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *InferenceModelRewriteList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceModelRewriteRule) DeepCopyInto(out *InferenceModelRewriteRule) { + *out = *in + if in.Matches != nil { + in, out := &in.Matches, &out.Matches + *out = make([]Match, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Targets != nil { + in, out := &in.Targets, &out.Targets + *out = make([]TargetModel, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelRewriteRule. +func (in *InferenceModelRewriteRule) DeepCopy() *InferenceModelRewriteRule { + if in == nil { + return nil + } + out := new(InferenceModelRewriteRule) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceModelRewriteSpec) DeepCopyInto(out *InferenceModelRewriteSpec) { + *out = *in + out.PoolRef = in.PoolRef + if in.Rules != nil { + in, out := &in.Rules, &out.Rules + *out = make([]InferenceModelRewriteRule, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelRewriteSpec. +func (in *InferenceModelRewriteSpec) DeepCopy() *InferenceModelRewriteSpec { + if in == nil { + return nil + } + out := new(InferenceModelRewriteSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceModelRewriteStatus) DeepCopyInto(out *InferenceModelRewriteStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelRewriteStatus. +func (in *InferenceModelRewriteStatus) DeepCopy() *InferenceModelRewriteStatus { + if in == nil { + return nil + } + out := new(InferenceModelRewriteStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceObjective) DeepCopyInto(out *InferenceObjective) { *out = *in @@ -266,6 +397,46 @@ func (in *InferencePoolStatus) DeepCopy() *InferencePoolStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Match) DeepCopyInto(out *Match) { + *out = *in + if in.Model != nil { + in, out := &in.Model, &out.Model + *out = new(ModelMatch) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Match. +func (in *Match) DeepCopy() *Match { + if in == nil { + return nil + } + out := new(Match) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ModelMatch) DeepCopyInto(out *ModelMatch) { + *out = *in + if in.Type != nil { + in, out := &in.Type, &out.Type + *out = new(MatchValidationType) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelMatch. +func (in *ModelMatch) DeepCopy() *ModelMatch { + if in == nil { + return nil + } + out := new(ModelMatch) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ParentGatewayReference) DeepCopyInto(out *ParentGatewayReference) { *out = *in @@ -333,3 +504,18 @@ func (in *PoolStatus) DeepCopy() *PoolStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TargetModel) DeepCopyInto(out *TargetModel) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TargetModel. +func (in *TargetModel) DeepCopy() *TargetModel { + if in == nil { + return nil + } + out := new(TargetModel) + in.DeepCopyInto(out) + return out +} diff --git a/apix/v1alpha2/zz_generated.register.go b/apix/v1alpha2/zz_generated.register.go index ae0e1dabe..42e569571 100644 --- a/apix/v1alpha2/zz_generated.register.go +++ b/apix/v1alpha2/zz_generated.register.go @@ -61,6 +61,8 @@ func init() { // Adds the list of known types to Scheme. func addKnownTypes(scheme *runtime.Scheme) error { scheme.AddKnownTypes(SchemeGroupVersion, + &InferenceModelRewrite{}, + &InferenceModelRewriteList{}, &InferenceObjective{}, &InferenceObjectiveList{}, &InferencePool{}, diff --git a/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewrite.go b/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewrite.go new file mode 100644 index 000000000..506e7957a --- /dev/null +++ b/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewrite.go @@ -0,0 +1,242 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + v1 "k8s.io/client-go/applyconfigurations/meta/v1" +) + +// InferenceModelRewriteApplyConfiguration represents a declarative configuration of the InferenceModelRewrite type for use +// with apply. +type InferenceModelRewriteApplyConfiguration struct { + v1.TypeMetaApplyConfiguration `json:",inline"` + *v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"` + Spec *InferenceModelRewriteSpecApplyConfiguration `json:"spec,omitempty"` + Status *InferenceModelRewriteStatusApplyConfiguration `json:"status,omitempty"` +} + +// InferenceModelRewrite constructs a declarative configuration of the InferenceModelRewrite type for use with +// apply. +func InferenceModelRewrite(name, namespace string) *InferenceModelRewriteApplyConfiguration { + b := &InferenceModelRewriteApplyConfiguration{} + b.WithName(name) + b.WithNamespace(namespace) + b.WithKind("InferenceModelRewrite") + b.WithAPIVersion("inference.networking.x-k8s.io/v1alpha2") + return b +} +func (b InferenceModelRewriteApplyConfiguration) IsApplyConfiguration() {} + +// WithKind sets the Kind field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Kind field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithKind(value string) *InferenceModelRewriteApplyConfiguration { + b.TypeMetaApplyConfiguration.Kind = &value + return b +} + +// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the APIVersion field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithAPIVersion(value string) *InferenceModelRewriteApplyConfiguration { + b.TypeMetaApplyConfiguration.APIVersion = &value + return b +} + +// WithName sets the Name field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Name field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithName(value string) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.Name = &value + return b +} + +// WithGenerateName sets the GenerateName field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the GenerateName field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithGenerateName(value string) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.GenerateName = &value + return b +} + +// WithNamespace sets the Namespace field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Namespace field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithNamespace(value string) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.Namespace = &value + return b +} + +// WithUID sets the UID field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the UID field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithUID(value types.UID) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.UID = &value + return b +} + +// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ResourceVersion field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithResourceVersion(value string) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.ResourceVersion = &value + return b +} + +// WithGeneration sets the Generation field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Generation field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithGeneration(value int64) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.Generation = &value + return b +} + +// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the CreationTimestamp field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithCreationTimestamp(value metav1.Time) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.CreationTimestamp = &value + return b +} + +// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the DeletionTimestamp field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value + return b +} + +// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value + return b +} + +// WithLabels puts the entries into the Labels field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the Labels field, +// overwriting an existing map entries in Labels field with the same key. +func (b *InferenceModelRewriteApplyConfiguration) WithLabels(entries map[string]string) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 { + b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries)) + } + for k, v := range entries { + b.ObjectMetaApplyConfiguration.Labels[k] = v + } + return b +} + +// WithAnnotations puts the entries into the Annotations field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the Annotations field, +// overwriting an existing map entries in Annotations field with the same key. +func (b *InferenceModelRewriteApplyConfiguration) WithAnnotations(entries map[string]string) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 { + b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries)) + } + for k, v := range entries { + b.ObjectMetaApplyConfiguration.Annotations[k] = v + } + return b +} + +// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the OwnerReferences field. +func (b *InferenceModelRewriteApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + for i := range values { + if values[i] == nil { + panic("nil value passed to WithOwnerReferences") + } + b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i]) + } + return b +} + +// WithFinalizers adds the given value to the Finalizers field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Finalizers field. +func (b *InferenceModelRewriteApplyConfiguration) WithFinalizers(values ...string) *InferenceModelRewriteApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + for i := range values { + b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i]) + } + return b +} + +func (b *InferenceModelRewriteApplyConfiguration) ensureObjectMetaApplyConfigurationExists() { + if b.ObjectMetaApplyConfiguration == nil { + b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{} + } +} + +// WithSpec sets the Spec field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Spec field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithSpec(value *InferenceModelRewriteSpecApplyConfiguration) *InferenceModelRewriteApplyConfiguration { + b.Spec = value + return b +} + +// WithStatus sets the Status field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Status field is set to the value of the last call. +func (b *InferenceModelRewriteApplyConfiguration) WithStatus(value *InferenceModelRewriteStatusApplyConfiguration) *InferenceModelRewriteApplyConfiguration { + b.Status = value + return b +} + +// GetKind retrieves the value of the Kind field in the declarative configuration. +func (b *InferenceModelRewriteApplyConfiguration) GetKind() *string { + return b.TypeMetaApplyConfiguration.Kind +} + +// GetAPIVersion retrieves the value of the APIVersion field in the declarative configuration. +func (b *InferenceModelRewriteApplyConfiguration) GetAPIVersion() *string { + return b.TypeMetaApplyConfiguration.APIVersion +} + +// GetName retrieves the value of the Name field in the declarative configuration. +func (b *InferenceModelRewriteApplyConfiguration) GetName() *string { + b.ensureObjectMetaApplyConfigurationExists() + return b.ObjectMetaApplyConfiguration.Name +} + +// GetNamespace retrieves the value of the Namespace field in the declarative configuration. +func (b *InferenceModelRewriteApplyConfiguration) GetNamespace() *string { + b.ensureObjectMetaApplyConfigurationExists() + return b.ObjectMetaApplyConfiguration.Namespace +} diff --git a/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewriterule.go b/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewriterule.go new file mode 100644 index 000000000..7e03c192d --- /dev/null +++ b/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewriterule.go @@ -0,0 +1,58 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha2 + +// InferenceModelRewriteRuleApplyConfiguration represents a declarative configuration of the InferenceModelRewriteRule type for use +// with apply. +type InferenceModelRewriteRuleApplyConfiguration struct { + Matches []MatchApplyConfiguration `json:"matches,omitempty"` + Targets []TargetModelApplyConfiguration `json:"split,omitempty"` +} + +// InferenceModelRewriteRuleApplyConfiguration constructs a declarative configuration of the InferenceModelRewriteRule type for use with +// apply. +func InferenceModelRewriteRule() *InferenceModelRewriteRuleApplyConfiguration { + return &InferenceModelRewriteRuleApplyConfiguration{} +} + +// WithMatches adds the given value to the Matches field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Matches field. +func (b *InferenceModelRewriteRuleApplyConfiguration) WithMatches(values ...*MatchApplyConfiguration) *InferenceModelRewriteRuleApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithMatches") + } + b.Matches = append(b.Matches, *values[i]) + } + return b +} + +// WithTargets adds the given value to the Targets field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Targets field. +func (b *InferenceModelRewriteRuleApplyConfiguration) WithTargets(values ...*TargetModelApplyConfiguration) *InferenceModelRewriteRuleApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithTargets") + } + b.Targets = append(b.Targets, *values[i]) + } + return b +} diff --git a/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewritespec.go b/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewritespec.go new file mode 100644 index 000000000..018223417 --- /dev/null +++ b/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewritespec.go @@ -0,0 +1,53 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha2 + +// InferenceModelRewriteSpecApplyConfiguration represents a declarative configuration of the InferenceModelRewriteSpec type for use +// with apply. +type InferenceModelRewriteSpecApplyConfiguration struct { + PoolRef *PoolObjectReferenceApplyConfiguration `json:"poolRef,omitempty"` + Rules []InferenceModelRewriteRuleApplyConfiguration `json:"rules,omitempty"` +} + +// InferenceModelRewriteSpecApplyConfiguration constructs a declarative configuration of the InferenceModelRewriteSpec type for use with +// apply. +func InferenceModelRewriteSpec() *InferenceModelRewriteSpecApplyConfiguration { + return &InferenceModelRewriteSpecApplyConfiguration{} +} + +// WithPoolRef sets the PoolRef field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the PoolRef field is set to the value of the last call. +func (b *InferenceModelRewriteSpecApplyConfiguration) WithPoolRef(value *PoolObjectReferenceApplyConfiguration) *InferenceModelRewriteSpecApplyConfiguration { + b.PoolRef = value + return b +} + +// WithRules adds the given value to the Rules field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Rules field. +func (b *InferenceModelRewriteSpecApplyConfiguration) WithRules(values ...*InferenceModelRewriteRuleApplyConfiguration) *InferenceModelRewriteSpecApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithRules") + } + b.Rules = append(b.Rules, *values[i]) + } + return b +} diff --git a/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewritestatus.go b/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewritestatus.go new file mode 100644 index 000000000..8004c5316 --- /dev/null +++ b/client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewritestatus.go @@ -0,0 +1,48 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + v1 "k8s.io/client-go/applyconfigurations/meta/v1" +) + +// InferenceModelRewriteStatusApplyConfiguration represents a declarative configuration of the InferenceModelRewriteStatus type for use +// with apply. +type InferenceModelRewriteStatusApplyConfiguration struct { + Conditions []v1.ConditionApplyConfiguration `json:"conditions,omitempty"` +} + +// InferenceModelRewriteStatusApplyConfiguration constructs a declarative configuration of the InferenceModelRewriteStatus type for use with +// apply. +func InferenceModelRewriteStatus() *InferenceModelRewriteStatusApplyConfiguration { + return &InferenceModelRewriteStatusApplyConfiguration{} +} + +// WithConditions adds the given value to the Conditions field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Conditions field. +func (b *InferenceModelRewriteStatusApplyConfiguration) WithConditions(values ...*v1.ConditionApplyConfiguration) *InferenceModelRewriteStatusApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithConditions") + } + b.Conditions = append(b.Conditions, *values[i]) + } + return b +} diff --git a/client-go/applyconfiguration/apix/v1alpha2/match.go b/client-go/applyconfiguration/apix/v1alpha2/match.go new file mode 100644 index 000000000..c0eca17f7 --- /dev/null +++ b/client-go/applyconfiguration/apix/v1alpha2/match.go @@ -0,0 +1,39 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha2 + +// MatchApplyConfiguration represents a declarative configuration of the Match type for use +// with apply. +type MatchApplyConfiguration struct { + Model *ModelMatchApplyConfiguration `json:"model,omitempty"` +} + +// MatchApplyConfiguration constructs a declarative configuration of the Match type for use with +// apply. +func Match() *MatchApplyConfiguration { + return &MatchApplyConfiguration{} +} + +// WithModel sets the Model field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Model field is set to the value of the last call. +func (b *MatchApplyConfiguration) WithModel(value *ModelMatchApplyConfiguration) *MatchApplyConfiguration { + b.Model = value + return b +} diff --git a/client-go/applyconfiguration/apix/v1alpha2/modelmatch.go b/client-go/applyconfiguration/apix/v1alpha2/modelmatch.go new file mode 100644 index 000000000..2f26d410b --- /dev/null +++ b/client-go/applyconfiguration/apix/v1alpha2/modelmatch.go @@ -0,0 +1,52 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + apixv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" +) + +// ModelMatchApplyConfiguration represents a declarative configuration of the ModelMatch type for use +// with apply. +type ModelMatchApplyConfiguration struct { + Type *apixv1alpha2.MatchValidationType `json:"type,omitempty"` + Value *string `json:"value,omitempty"` +} + +// ModelMatchApplyConfiguration constructs a declarative configuration of the ModelMatch type for use with +// apply. +func ModelMatch() *ModelMatchApplyConfiguration { + return &ModelMatchApplyConfiguration{} +} + +// WithType sets the Type field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Type field is set to the value of the last call. +func (b *ModelMatchApplyConfiguration) WithType(value apixv1alpha2.MatchValidationType) *ModelMatchApplyConfiguration { + b.Type = &value + return b +} + +// WithValue sets the Value field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Value field is set to the value of the last call. +func (b *ModelMatchApplyConfiguration) WithValue(value string) *ModelMatchApplyConfiguration { + b.Value = &value + return b +} diff --git a/client-go/applyconfiguration/apix/v1alpha2/targetmodel.go b/client-go/applyconfiguration/apix/v1alpha2/targetmodel.go new file mode 100644 index 000000000..4125a3223 --- /dev/null +++ b/client-go/applyconfiguration/apix/v1alpha2/targetmodel.go @@ -0,0 +1,48 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha2 + +// TargetModelApplyConfiguration represents a declarative configuration of the TargetModel type for use +// with apply. +type TargetModelApplyConfiguration struct { + Weight *int32 `json:"weight,omitempty"` + ModelRewrite *string `json:"modelRewrite,omitempty"` +} + +// TargetModelApplyConfiguration constructs a declarative configuration of the TargetModel type for use with +// apply. +func TargetModel() *TargetModelApplyConfiguration { + return &TargetModelApplyConfiguration{} +} + +// WithWeight sets the Weight field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Weight field is set to the value of the last call. +func (b *TargetModelApplyConfiguration) WithWeight(value int32) *TargetModelApplyConfiguration { + b.Weight = &value + return b +} + +// WithModelRewrite sets the ModelRewrite field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ModelRewrite field is set to the value of the last call. +func (b *TargetModelApplyConfiguration) WithModelRewrite(value string) *TargetModelApplyConfiguration { + b.ModelRewrite = &value + return b +} diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go index 7e4ea0915..93d50e54b 100644 --- a/client-go/applyconfiguration/utils.go +++ b/client-go/applyconfiguration/utils.go @@ -66,6 +66,14 @@ func ForKind(kind schema.GroupVersionKind) interface{} { // Group=inference.networking.x-k8s.io, Version=v1alpha2 case v1alpha2.SchemeGroupVersion.WithKind("Extension"): return &apixv1alpha2.ExtensionApplyConfiguration{} + case v1alpha2.SchemeGroupVersion.WithKind("InferenceModelRewrite"): + return &apixv1alpha2.InferenceModelRewriteApplyConfiguration{} + case v1alpha2.SchemeGroupVersion.WithKind("InferenceModelRewriteRule"): + return &apixv1alpha2.InferenceModelRewriteRuleApplyConfiguration{} + case v1alpha2.SchemeGroupVersion.WithKind("InferenceModelRewriteSpec"): + return &apixv1alpha2.InferenceModelRewriteSpecApplyConfiguration{} + case v1alpha2.SchemeGroupVersion.WithKind("InferenceModelRewriteStatus"): + return &apixv1alpha2.InferenceModelRewriteStatusApplyConfiguration{} case v1alpha2.SchemeGroupVersion.WithKind("InferenceObjective"): return &apixv1alpha2.InferenceObjectiveApplyConfiguration{} case v1alpha2.SchemeGroupVersion.WithKind("InferenceObjectiveSpec"): @@ -78,12 +86,18 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &apixv1alpha2.InferencePoolSpecApplyConfiguration{} case v1alpha2.SchemeGroupVersion.WithKind("InferencePoolStatus"): return &apixv1alpha2.InferencePoolStatusApplyConfiguration{} + case v1alpha2.SchemeGroupVersion.WithKind("Match"): + return &apixv1alpha2.MatchApplyConfiguration{} + case v1alpha2.SchemeGroupVersion.WithKind("ModelMatch"): + return &apixv1alpha2.ModelMatchApplyConfiguration{} case v1alpha2.SchemeGroupVersion.WithKind("ParentGatewayReference"): return &apixv1alpha2.ParentGatewayReferenceApplyConfiguration{} case v1alpha2.SchemeGroupVersion.WithKind("PoolObjectReference"): return &apixv1alpha2.PoolObjectReferenceApplyConfiguration{} case v1alpha2.SchemeGroupVersion.WithKind("PoolStatus"): return &apixv1alpha2.PoolStatusApplyConfiguration{} + case v1alpha2.SchemeGroupVersion.WithKind("TargetModel"): + return &apixv1alpha2.TargetModelApplyConfiguration{} } return nil diff --git a/client-go/clientset/versioned/typed/apix/v1alpha2/apix_client.go b/client-go/clientset/versioned/typed/apix/v1alpha2/apix_client.go index 84f636fb4..8d21b3c8d 100644 --- a/client-go/clientset/versioned/typed/apix/v1alpha2/apix_client.go +++ b/client-go/clientset/versioned/typed/apix/v1alpha2/apix_client.go @@ -28,6 +28,7 @@ import ( type XInferenceV1alpha2Interface interface { RESTClient() rest.Interface + InferenceModelRewritesGetter InferenceObjectivesGetter InferencePoolsGetter } @@ -37,6 +38,10 @@ type XInferenceV1alpha2Client struct { restClient rest.Interface } +func (c *XInferenceV1alpha2Client) InferenceModelRewrites(namespace string) InferenceModelRewriteInterface { + return newInferenceModelRewrites(c, namespace) +} + func (c *XInferenceV1alpha2Client) InferenceObjectives(namespace string) InferenceObjectiveInterface { return newInferenceObjectives(c, namespace) } diff --git a/client-go/clientset/versioned/typed/apix/v1alpha2/fake/fake_apix_client.go b/client-go/clientset/versioned/typed/apix/v1alpha2/fake/fake_apix_client.go index 54a5b047c..c0b2b2039 100644 --- a/client-go/clientset/versioned/typed/apix/v1alpha2/fake/fake_apix_client.go +++ b/client-go/clientset/versioned/typed/apix/v1alpha2/fake/fake_apix_client.go @@ -28,6 +28,10 @@ type FakeXInferenceV1alpha2 struct { *testing.Fake } +func (c *FakeXInferenceV1alpha2) InferenceModelRewrites(namespace string) v1alpha2.InferenceModelRewriteInterface { + return newFakeInferenceModelRewrites(c, namespace) +} + func (c *FakeXInferenceV1alpha2) InferenceObjectives(namespace string) v1alpha2.InferenceObjectiveInterface { return newFakeInferenceObjectives(c, namespace) } diff --git a/client-go/clientset/versioned/typed/apix/v1alpha2/fake/fake_inferencemodelrewrite.go b/client-go/clientset/versioned/typed/apix/v1alpha2/fake/fake_inferencemodelrewrite.go new file mode 100644 index 000000000..43f93c8a8 --- /dev/null +++ b/client-go/clientset/versioned/typed/apix/v1alpha2/fake/fake_inferencemodelrewrite.go @@ -0,0 +1,53 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + gentype "k8s.io/client-go/gentype" + v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" + apixv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/apix/v1alpha2" + typedapixv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/apix/v1alpha2" +) + +// fakeInferenceModelRewrites implements InferenceModelRewriteInterface +type fakeInferenceModelRewrites struct { + *gentype.FakeClientWithListAndApply[*v1alpha2.InferenceModelRewrite, *v1alpha2.InferenceModelRewriteList, *apixv1alpha2.InferenceModelRewriteApplyConfiguration] + Fake *FakeXInferenceV1alpha2 +} + +func newFakeInferenceModelRewrites(fake *FakeXInferenceV1alpha2, namespace string) typedapixv1alpha2.InferenceModelRewriteInterface { + return &fakeInferenceModelRewrites{ + gentype.NewFakeClientWithListAndApply[*v1alpha2.InferenceModelRewrite, *v1alpha2.InferenceModelRewriteList, *apixv1alpha2.InferenceModelRewriteApplyConfiguration]( + fake.Fake, + namespace, + v1alpha2.SchemeGroupVersion.WithResource("inferencemodelrewrites"), + v1alpha2.SchemeGroupVersion.WithKind("InferenceModelRewrite"), + func() *v1alpha2.InferenceModelRewrite { return &v1alpha2.InferenceModelRewrite{} }, + func() *v1alpha2.InferenceModelRewriteList { return &v1alpha2.InferenceModelRewriteList{} }, + func(dst, src *v1alpha2.InferenceModelRewriteList) { dst.ListMeta = src.ListMeta }, + func(list *v1alpha2.InferenceModelRewriteList) []*v1alpha2.InferenceModelRewrite { + return gentype.ToPointerSlice(list.Items) + }, + func(list *v1alpha2.InferenceModelRewriteList, items []*v1alpha2.InferenceModelRewrite) { + list.Items = gentype.FromPointerSlice(items) + }, + ), + fake, + } +} diff --git a/client-go/clientset/versioned/typed/apix/v1alpha2/generated_expansion.go b/client-go/clientset/versioned/typed/apix/v1alpha2/generated_expansion.go index b85a89d83..dfd94b699 100644 --- a/client-go/clientset/versioned/typed/apix/v1alpha2/generated_expansion.go +++ b/client-go/clientset/versioned/typed/apix/v1alpha2/generated_expansion.go @@ -18,6 +18,8 @@ limitations under the License. package v1alpha2 +type InferenceModelRewriteExpansion interface{} + type InferenceObjectiveExpansion interface{} type InferencePoolExpansion interface{} diff --git a/client-go/clientset/versioned/typed/apix/v1alpha2/inferencemodelrewrite.go b/client-go/clientset/versioned/typed/apix/v1alpha2/inferencemodelrewrite.go new file mode 100644 index 000000000..0fac36185 --- /dev/null +++ b/client-go/clientset/versioned/typed/apix/v1alpha2/inferencemodelrewrite.go @@ -0,0 +1,74 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + context "context" + + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + gentype "k8s.io/client-go/gentype" + apixv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" + applyconfigurationapixv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/apix/v1alpha2" + scheme "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/scheme" +) + +// InferenceModelRewritesGetter has a method to return a InferenceModelRewriteInterface. +// A group's client should implement this interface. +type InferenceModelRewritesGetter interface { + InferenceModelRewrites(namespace string) InferenceModelRewriteInterface +} + +// InferenceModelRewriteInterface has methods to work with InferenceModelRewrite resources. +type InferenceModelRewriteInterface interface { + Create(ctx context.Context, inferenceModelRewrite *apixv1alpha2.InferenceModelRewrite, opts v1.CreateOptions) (*apixv1alpha2.InferenceModelRewrite, error) + Update(ctx context.Context, inferenceModelRewrite *apixv1alpha2.InferenceModelRewrite, opts v1.UpdateOptions) (*apixv1alpha2.InferenceModelRewrite, error) + // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). + UpdateStatus(ctx context.Context, inferenceModelRewrite *apixv1alpha2.InferenceModelRewrite, opts v1.UpdateOptions) (*apixv1alpha2.InferenceModelRewrite, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*apixv1alpha2.InferenceModelRewrite, error) + List(ctx context.Context, opts v1.ListOptions) (*apixv1alpha2.InferenceModelRewriteList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *apixv1alpha2.InferenceModelRewrite, err error) + Apply(ctx context.Context, inferenceModelRewrite *applyconfigurationapixv1alpha2.InferenceModelRewriteApplyConfiguration, opts v1.ApplyOptions) (result *apixv1alpha2.InferenceModelRewrite, err error) + // Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). + ApplyStatus(ctx context.Context, inferenceModelRewrite *applyconfigurationapixv1alpha2.InferenceModelRewriteApplyConfiguration, opts v1.ApplyOptions) (result *apixv1alpha2.InferenceModelRewrite, err error) + InferenceModelRewriteExpansion +} + +// inferenceModelRewrites implements InferenceModelRewriteInterface +type inferenceModelRewrites struct { + *gentype.ClientWithListAndApply[*apixv1alpha2.InferenceModelRewrite, *apixv1alpha2.InferenceModelRewriteList, *applyconfigurationapixv1alpha2.InferenceModelRewriteApplyConfiguration] +} + +// newInferenceModelRewrites returns a InferenceModelRewrites +func newInferenceModelRewrites(c *XInferenceV1alpha2Client, namespace string) *inferenceModelRewrites { + return &inferenceModelRewrites{ + gentype.NewClientWithListAndApply[*apixv1alpha2.InferenceModelRewrite, *apixv1alpha2.InferenceModelRewriteList, *applyconfigurationapixv1alpha2.InferenceModelRewriteApplyConfiguration]( + "inferencemodelrewrites", + c.RESTClient(), + scheme.ParameterCodec, + namespace, + func() *apixv1alpha2.InferenceModelRewrite { return &apixv1alpha2.InferenceModelRewrite{} }, + func() *apixv1alpha2.InferenceModelRewriteList { return &apixv1alpha2.InferenceModelRewriteList{} }, + ), + } +} diff --git a/client-go/informers/externalversions/apix/v1alpha2/inferencemodelrewrite.go b/client-go/informers/externalversions/apix/v1alpha2/inferencemodelrewrite.go new file mode 100644 index 000000000..c5f0b97ae --- /dev/null +++ b/client-go/informers/externalversions/apix/v1alpha2/inferencemodelrewrite.go @@ -0,0 +1,102 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by informer-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + context "context" + time "time" + + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" + gatewayapiinferenceextensionapixv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" + versioned "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned" + internalinterfaces "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/internalinterfaces" + apixv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/listers/apix/v1alpha2" +) + +// InferenceModelRewriteInformer provides access to a shared informer and lister for +// InferenceModelRewrites. +type InferenceModelRewriteInformer interface { + Informer() cache.SharedIndexInformer + Lister() apixv1alpha2.InferenceModelRewriteLister +} + +type inferenceModelRewriteInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc + namespace string +} + +// NewInferenceModelRewriteInformer constructs a new informer for InferenceModelRewrite type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewInferenceModelRewriteInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredInferenceModelRewriteInformer(client, namespace, resyncPeriod, indexers, nil) +} + +// NewFilteredInferenceModelRewriteInformer constructs a new informer for InferenceModelRewrite type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredInferenceModelRewriteInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.XInferenceV1alpha2().InferenceModelRewrites(namespace).List(context.Background(), options) + }, + WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.XInferenceV1alpha2().InferenceModelRewrites(namespace).Watch(context.Background(), options) + }, + ListWithContextFunc: func(ctx context.Context, options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.XInferenceV1alpha2().InferenceModelRewrites(namespace).List(ctx, options) + }, + WatchFuncWithContext: func(ctx context.Context, options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.XInferenceV1alpha2().InferenceModelRewrites(namespace).Watch(ctx, options) + }, + }, + &gatewayapiinferenceextensionapixv1alpha2.InferenceModelRewrite{}, + resyncPeriod, + indexers, + ) +} + +func (f *inferenceModelRewriteInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredInferenceModelRewriteInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *inferenceModelRewriteInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&gatewayapiinferenceextensionapixv1alpha2.InferenceModelRewrite{}, f.defaultInformer) +} + +func (f *inferenceModelRewriteInformer) Lister() apixv1alpha2.InferenceModelRewriteLister { + return apixv1alpha2.NewInferenceModelRewriteLister(f.Informer().GetIndexer()) +} diff --git a/client-go/informers/externalversions/apix/v1alpha2/interface.go b/client-go/informers/externalversions/apix/v1alpha2/interface.go index 9f6981658..11ccf5c87 100644 --- a/client-go/informers/externalversions/apix/v1alpha2/interface.go +++ b/client-go/informers/externalversions/apix/v1alpha2/interface.go @@ -24,6 +24,8 @@ import ( // Interface provides access to all the informers in this group version. type Interface interface { + // InferenceModelRewrites returns a InferenceModelRewriteInformer. + InferenceModelRewrites() InferenceModelRewriteInformer // InferenceObjectives returns a InferenceObjectiveInformer. InferenceObjectives() InferenceObjectiveInformer // InferencePools returns a InferencePoolInformer. @@ -41,6 +43,11 @@ func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakList return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} } +// InferenceModelRewrites returns a InferenceModelRewriteInformer. +func (v *version) InferenceModelRewrites() InferenceModelRewriteInformer { + return &inferenceModelRewriteInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} +} + // InferenceObjectives returns a InferenceObjectiveInformer. func (v *version) InferenceObjectives() InferenceObjectiveInformer { return &inferenceObjectiveInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} diff --git a/client-go/informers/externalversions/generic.go b/client-go/informers/externalversions/generic.go index 2fe29156b..5eeaea4d3 100644 --- a/client-go/informers/externalversions/generic.go +++ b/client-go/informers/externalversions/generic.go @@ -63,6 +63,8 @@ func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource return &genericInformer{resource: resource.GroupResource(), informer: f.XInference().V1alpha1().InferencePoolImports().Informer()}, nil // Group=inference.networking.x-k8s.io, Version=v1alpha2 + case v1alpha2.SchemeGroupVersion.WithResource("inferencemodelrewrites"): + return &genericInformer{resource: resource.GroupResource(), informer: f.XInference().V1alpha2().InferenceModelRewrites().Informer()}, nil case v1alpha2.SchemeGroupVersion.WithResource("inferenceobjectives"): return &genericInformer{resource: resource.GroupResource(), informer: f.XInference().V1alpha2().InferenceObjectives().Informer()}, nil case v1alpha2.SchemeGroupVersion.WithResource("inferencepools"): diff --git a/client-go/listers/apix/v1alpha2/expansion_generated.go b/client-go/listers/apix/v1alpha2/expansion_generated.go index 09f6d1de3..b20936e0b 100644 --- a/client-go/listers/apix/v1alpha2/expansion_generated.go +++ b/client-go/listers/apix/v1alpha2/expansion_generated.go @@ -18,6 +18,14 @@ limitations under the License. package v1alpha2 +// InferenceModelRewriteListerExpansion allows custom methods to be added to +// InferenceModelRewriteLister. +type InferenceModelRewriteListerExpansion interface{} + +// InferenceModelRewriteNamespaceListerExpansion allows custom methods to be added to +// InferenceModelRewriteNamespaceLister. +type InferenceModelRewriteNamespaceListerExpansion interface{} + // InferenceObjectiveListerExpansion allows custom methods to be added to // InferenceObjectiveLister. type InferenceObjectiveListerExpansion interface{} diff --git a/client-go/listers/apix/v1alpha2/inferencemodelrewrite.go b/client-go/listers/apix/v1alpha2/inferencemodelrewrite.go new file mode 100644 index 000000000..8209c2971 --- /dev/null +++ b/client-go/listers/apix/v1alpha2/inferencemodelrewrite.go @@ -0,0 +1,70 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by lister-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + labels "k8s.io/apimachinery/pkg/labels" + listers "k8s.io/client-go/listers" + cache "k8s.io/client-go/tools/cache" + apixv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" +) + +// InferenceModelRewriteLister helps list InferenceModelRewrites. +// All objects returned here must be treated as read-only. +type InferenceModelRewriteLister interface { + // List lists all InferenceModelRewrites in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*apixv1alpha2.InferenceModelRewrite, err error) + // InferenceModelRewrites returns an object that can list and get InferenceModelRewrites. + InferenceModelRewrites(namespace string) InferenceModelRewriteNamespaceLister + InferenceModelRewriteListerExpansion +} + +// inferenceModelRewriteLister implements the InferenceModelRewriteLister interface. +type inferenceModelRewriteLister struct { + listers.ResourceIndexer[*apixv1alpha2.InferenceModelRewrite] +} + +// NewInferenceModelRewriteLister returns a new InferenceModelRewriteLister. +func NewInferenceModelRewriteLister(indexer cache.Indexer) InferenceModelRewriteLister { + return &inferenceModelRewriteLister{listers.New[*apixv1alpha2.InferenceModelRewrite](indexer, apixv1alpha2.Resource("inferencemodelrewrite"))} +} + +// InferenceModelRewrites returns an object that can list and get InferenceModelRewrites. +func (s *inferenceModelRewriteLister) InferenceModelRewrites(namespace string) InferenceModelRewriteNamespaceLister { + return inferenceModelRewriteNamespaceLister{listers.NewNamespaced[*apixv1alpha2.InferenceModelRewrite](s.ResourceIndexer, namespace)} +} + +// InferenceModelRewriteNamespaceLister helps list and get InferenceModelRewrites. +// All objects returned here must be treated as read-only. +type InferenceModelRewriteNamespaceLister interface { + // List lists all InferenceModelRewrites in the indexer for a given namespace. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*apixv1alpha2.InferenceModelRewrite, err error) + // Get retrieves the InferenceModelRewrite from the indexer for a given namespace and name. + // Objects returned here must be treated as read-only. + Get(name string) (*apixv1alpha2.InferenceModelRewrite, error) + InferenceModelRewriteNamespaceListerExpansion +} + +// inferenceModelRewriteNamespaceLister implements the InferenceModelRewriteNamespaceLister +// interface. +type inferenceModelRewriteNamespaceLister struct { + listers.ResourceIndexer[*apixv1alpha2.InferenceModelRewrite] +} diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencemodelrewrites.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencemodelrewrites.yaml new file mode 100644 index 000000000..585a40018 --- /dev/null +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencemodelrewrites.yaml @@ -0,0 +1,236 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + inference.networking.k8s.io/bundle-version: main-dev + name: inferencemodelrewrites.inference.networking.x-k8s.io +spec: + group: inference.networking.x-k8s.io + names: + kind: InferenceModelRewrite + listKind: InferenceModelRewriteList + plural: inferencemodelrewrites + singular: inferencemodelrewrite + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.poolRef.name + name: Inference Pool + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha2 + schema: + openAPIV3Schema: + description: InferenceModelRewrite is the Schema for the InferenceModelRewrite + API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: InferenceModelRewriteSpec defines the desired state of InferenceModelRewrite. + properties: + poolRef: + description: PoolRef is a reference to the inference pool. + properties: + group: + default: inference.networking.k8s.io + description: Group is the group of the referent. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: InferencePool + description: Kind is kind of the referent. For example "InferencePool". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + required: + - name + type: object + rules: + items: + description: |- + InferenceModelRewriteRule defines the match criteria and corresponding action. + + A specific model name can only be matched by one rule across all + policies attached to the same InferencePool. If multiple policies attempt + to match the same model name, the oldest policy (by creationTimestamp) + will be the only one considered valid. Newer policies with conflicting + matches will be marked as invalid in their status. + properties: + matches: + items: + description: Match defines the criteria for matching the LLM + requests. + properties: + model: + description: |- + Model specifies the criteria for matching the 'model' field + within the JSON request body. + properties: + type: + default: Exact + description: |- + Type specifies the kind of string matching to use. + Supported value is "Exact". Defaults to "Exact". + enum: + - Exact + type: string + value: + description: Value is the model name string to match + against. + minLength: 1 + type: string + required: + - value + type: object + required: + - model + type: object + type: array + split: + items: + description: TargetModel defines a weighted model destination + for traffic distribution. + properties: + modelRewrite: + type: string + weight: + description: |- + (The following comment is copied from the original targetModel) + Weight is used to determine the proportion of traffic that should be + sent to this model when multiple target models are specified. + + Weight defines the proportion of requests forwarded to the specified + model. This is computed as weight/(sum of all weights in this + TargetModels list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If a weight is set for any targetModel, it must be set for all targetModels. + Conversely weights are optional, so long as ALL targetModels do not specify a weight. + format: int32 + maximum: 1000000 + minimum: 1 + type: integer + type: object + minItems: 1 + type: array + type: object + type: array + required: + - poolRef + - rules + type: object + status: + description: InferenceModelRewriteStatus defines the observed state of + InferenceModelRewrite. + properties: + conditions: + default: + - lastTransitionTime: "1970-01-01T00:00:00Z" + message: Waiting for controller + reason: Pending + status: Unknown + type: Accepted + description: |- + Conditions track the state of the InferenceModelRewrite. + + Known condition types are: + + * "Accepted" + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: null + storedVersions: null diff --git a/docs/proposals/1816-inferenceomodelrewrite/README.md b/docs/proposals/1816-inferenceomodelrewrite/README.md new file mode 100644 index 000000000..87093c713 --- /dev/null +++ b/docs/proposals/1816-inferenceomodelrewrite/README.md @@ -0,0 +1,235 @@ +# Inference Pool Level Model Name Redirect and Traffic Splitting + +**Status:** Proposed + +For the full, detailed proposal, please see the [original proposal](https://docs.google.com/document/d/12yR_nAWM-Tg2ZmgGYX1h-dlUNi0AqYoACUjNElipl0M/edit?usp=sharing). + +## Summary + +The original `InferenceModel` API ([v1alpha2](https://github.com/capri-xiyue/gateway-api-inference-extension/blob/0189c333c2d4076f099fda81bc37f41996426fa9/apix/v1alpha2/inferencemodel_types.go)) provided initial support for model routing. This proposal reintroduces and enhances the core functionalities of model name redirection and traffic splitting within an inference pool. This is essential for modern use cases such as model name aliasing/versioning and granular traffic splitting for gradual rollouts. + +### Goals +* Enable configurable model redirection/aliasing within an `InferencePool`. +* Enable traffic splitting within an `InferencePool`. + +### Non-Goals +* Traffic splitting between `InferencePools` will be handled by `HTTPRoute`. + +## Proposal + +We propose introducing a new Custom Resource Definition (CRD), `InferenceModelRewrite`, to define rules for model redirection and traffic splitting. The Endpoint Picker Proxy (EPP) will be responsible for watching these resources and performing the necessary request body mutations and traffic distribution. + +This approach provides the cleanest separation of concerns: +* **BBR / `HTTPRoute`:** Handles "global" (inter-pool) routing. It directs the user-facing model name to the correct logical `InferencePool`. +* **EPP / `InferenceModelRewrite`:** Manages "local" (intra-pool) implementation details, such as routing to different model versions (`v1` vs. `v2`) within that pool. + +For a more detailed discussion on the execution architecture (BBR vs. EPP) and naming conventions, please refer to the [original proposal document](https://docs.google.com/document/d/12yR_nAWM-Tg2ZmgGYX1h-dlUNi0AqYoACUjNElipl0M/edit?usp=sharing). + +### CRD Specification + +```go +// InferenceModelRewrite is the Schema for the InferenceModelRewrite API. +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Inference Pool",type=string,JSONPath=`.spec.poolRef.name` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` +// +genclient +type InferenceModelRewrite struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec InferenceModelRewriteSpec `json:"spec,omitempty"` + Status InferenceModelRewriteStatus `json:"status,omitempty"` +} + +// InferenceModelRewriteList contains a list of InferenceModelRewrite. +// +// +kubebuilder:object:root=true +type InferenceModelRewriteList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []InferenceModelRewrite `json:"items"` +} + +// InferenceModelRewriteSpec defines the desired state of InferenceModelRewrite. +type InferenceModelRewriteSpec struct { + // PoolRef is a reference to the inference pool. + // +kubebuilder:validation:Required + PoolRef PoolObjectReference `json:"poolRef"` + + // Rules are the ordered set of rules for rewriting inference requests. + // The first rule to match a request will be used. + // + // --- Precedence and Conflict Resolution --- + // If multiple InferenceModelRewrite resources target the same + // InferencePool, the controller will merge them based on precedence. + // + // **Timestamp Wins:** If two rules from different policies all matches, + // the rule from the *oldest* + // InferenceModelRewrite resource (determined by + // metadata.creationTimestamp) will be used. + // +required + Rules []InferenceModelRewriteRule `json:"rules"` +} + +// InferenceModelRewriteRule defines the match criteria and corresponding action. +// +// A specific model name can only be matched by one rule across all +// policies attached to the same InferencePool. If multiple policies attempt +// to match the same model name, the oldest policy (by creationTimestamp) +// will be the only one considered valid. Newer policies with conflicting +// matches will be marked as invalid in their status. +type InferenceModelRewriteRule struct { + // Matches defines the criteria for matching a request. + // If multiple match criteria are specified, a request matches if + // ANY of the criteria are satisfied (logical OR). + //If empty, the rule matches all requests. + + // +optional + Matches []Match `json:"matches,omitempty"` + + // --- Actions --- + // Targets defines how to distribute traffic across a set of + // weighted model targets. This is used for traffic splitting, A/B tests, + // or canary rollouts. + // +optional + // +kubebuilder:validation:MinItems=1 + // + Targets []TargetModel `json:"split,omitempty"` +} + +// TargetModel defines a weighted model destination for traffic distribution. +type TargetModel struct { + // (The following comment is copied from the original targetModel) + // Weight is used to determine the proportion of traffic that should be + // sent to this model when multiple target models are specified. + // + // Weight defines the proportion of requests forwarded to the specified + // model. This is computed as weight/(sum of all weights in this + // TargetModels list). For non-zero values, there may be some epsilon from + // the exact proportion defined here depending on the precision an + // implementation supports. Weight is not a percentage and the sum of + // weights does not need to equal 100. + // + // If a weight is set for any targetModel, it must be set for all targetModels. + // Conversely weights are optional, so long as ALL targetModels do not specify a weight. + // + // +optional + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=1000000 + Weight int32 `json:"weight"` + + // --- Destination Types --- + // ModelRewrite specifies a static model name destination. + // +optional + ModelRewrite string `json:"modelRewrite"` +} + +// Match defines the criteria for matching the LLM requests. +type Match struct { + // Model specifies the criteria for matching the 'model' field + // within the JSON request body. + // +required + Model *ModelMatch `json:"model,omitempty"` +} + +// ModelMatch defines how to match against the model name in the request body. +type ModelMatch struct { + // Type specifies the kind of string matching to use. + // Supported value is "Exact". Defaults to "Exact". + // +optional + // +kubebuilder:default=Exact + Type *MatchValidationType `json:"type,omitempty"` + + // Value is the model name string to match against. + // +required + // +kubebuilder:validation:MinLength=1 + Value string `json:"value"` +} + +// MatchValidationType specifies the type of string matching to use. +// +kubebuilder:validation:Enum=Exact +type MatchValidationType string + +const ( + // MatchExact indicates that the model name must match exactly. + MatchExact MatchValidationType = "Exact" +) + +// InferenceModelRewriteStatus defines the observed state of InferenceModelRewrite. +type InferenceModelRewriteStatus struct { + // Conditions track the state of the InferenceModelRewrite. + // + // Known condition types are: + // + // * "Accepted" + // + // +optional + // +listType=map + // +listMapKey=type + // +kubebuilder:validation:MaxItems=8 + // +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// InferenceModelRewriteConditionType is a type of condition for the InferenceModelRewrite. +type InferenceModelRewriteConditionType string + +// InferenceModelRewriteConditionReason is the reason for a given InferenceModelRewriteConditionType. +type InferenceModelRewriteConditionReason string + +const ( + // RewriteConditionAccepted indicates if the rewrite policy is accepted, and if not, why. + // This is the primary condition for this resource. + // + // Possible reasons for this condition to be True are: + // + // * "Accepted" + // + // Possible reasons for this condition to be Unknown are: + // + // * "Pending" + // + RewriteConditionAccepted InferenceModelRewriteConditionType = "Accepted" + + // RewriteReasonAccepted indicates the policy is valid, non-conflicting, + // and has been successfully applied to the inference pool. + RewriteReasonAccepted InferenceModelRewriteConditionReason = "Accepted" + + // RewriteReasonPending is the initial state, and indicates that the + // controller has not yet reconciled the InferenceModelRewrite. + RewriteReasonPending InferenceModelRewriteConditionReason = "Pending" +) +``` + +### Example: Traffic Splitting + +```yaml +apiVersion: inference.networking.x-k8s.io/v1alpha1 +kind: InferenceModelRewrite +metadata: + name: food-review-canary-rollout +spec: + poolRef: + name: main-food-review-pool + rules: + - matches: + - model: + type: Exact + value: "foodreview" + targets: + - modelRewrite: "foodreview-v1" + weight: 10 + - modelRewrite: "foodreview-v2" + weight: 90 +``` + +## Implementation Phases + +* **Phase 1: EPP-Driven Intra-Pool Rewrite:** This phase delivers the core functionality for the most common use case: model rewrite and traffic splitting within a single `InferencePool`. + * The `InferenceModelRewrite` CRD will be created. + * The EPP will be enhanced to be a read-only controller for this CRD, executing the request body mutation and traffic splitting. + * **Key Point:** The EPP will be a **read-only** consumer of the CRD and will not write to its status field. Status updates (e.g., marking conflicts) must be handled by a new, separate controller if needed. + +* **Phase 2 (Conditional): Promote Rewrite Logic to a Shared Library:** If user patterns show a strong need for inter-pool traffic splitting based on rewritten model names, the logic can be moved into a shared library used by both BBR and EPP. + * An idempotency mechanism, like an `X-Gateway-Model-Name` header, would be added to prevent the EPP from re-executing the logic.