Skip to content

Commit 4f9c99f

Browse files
committed
Add inferenceomodelrewrite api.
1 parent e20606e commit 4f9c99f

25 files changed

+1756
-21
lines changed
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha2
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// InferenceModelRewrite is the Schema for the InferenceModelRewrite API.
24+
// +kubebuilder:object:root=true
25+
// +kubebuilder:subresource:status
26+
// +kubebuilder:printcolumn:name="Inference Pool",type=string,JSONPath=`.spec.poolRef.name`
27+
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
28+
// +genclient
29+
type InferenceModelRewrite struct {
30+
metav1.TypeMeta `json:",inline"`
31+
metav1.ObjectMeta `json:"metadata,omitempty"`
32+
33+
Spec InferenceModelRewriteSpec `json:"spec,omitempty"`
34+
Status InferenceModelRewriteStatus `json:"status,omitempty"`
35+
}
36+
37+
// InferenceModelRewriteList contains a list of InferenceModelRewrite.
38+
//
39+
// +kubebuilder:object:root=true
40+
type InferenceModelRewriteList struct {
41+
metav1.TypeMeta `json:",inline"`
42+
metav1.ListMeta `json:"metadata,omitempty"`
43+
Items []InferenceModelRewrite `json:"items"`
44+
}
45+
46+
// InferenceModelRewriteSpec defines the desired state of InferenceModelRewrite.
47+
type InferenceModelRewriteSpec struct {
48+
// PoolRef is a reference to the inference pool.
49+
// +kubebuilder:validation:Required
50+
PoolRef PoolObjectReference `json:"poolRef"`
51+
52+
// Rules are the ordered set of rules for rewriting inference requests.
53+
// The first rule to match a request will be used.
54+
55+
//
56+
// --- Precedence and Conflict Resolution ---
57+
// If multiple InferenceModelRewrite resources target the same
58+
// InferencePool, the controller will merge them based on precedence.
59+
//
60+
// **Timestamp Wins:** If two rules from different policies all matches,
61+
// the rule from the *oldest*
62+
// InferenceModelRewrite resource (determined by
63+
// metadata.creationTimestamp) will be used.
64+
// +required
65+
Rules []InferenceModelRewriteRule `json:"rules"`
66+
}
67+
68+
// InferenceModelRewriteRule defines the match criteria and corresponding action.
69+
//
70+
// A specific model name can only be matched by one rule across all
71+
// policies attached to the same InferencePool. If multiple policies attempt
72+
// to match the same model name, the oldest policy (by creationTimestamp)
73+
// will be the only one considered valid. Newer policies with conflicting
74+
// matches will be marked as invalid in their status.
75+
type InferenceModelRewriteRule struct {
76+
// Matches defines the criteria for matching a request.
77+
// If multiple match criteria are specified, a request matches if
78+
// ANY of the criteria are satisfied (logical OR).
79+
// If empty, the rule matches all requests.
80+
81+
// +optional
82+
Matches []Match `json:"matches,omitempty"`
83+
84+
// --- Actions ---
85+
// Targets defines how to distribute traffic across a set of
86+
// weighted model targets. This is used for traffic splitting, A/B tests,
87+
// or canary rollouts.
88+
// +optional
89+
// +kubebuilder:validation:MinItems=1
90+
//
91+
Targets []TargetModel `json:"split,omitempty"`
92+
}
93+
94+
// TargetModel defines a weighted model destination for traffic distribution.
95+
type TargetModel struct {
96+
// (The following comment is copied from the original targetModel)
97+
// Weight is used to determine the proportion of traffic that should be
98+
// sent to this model when multiple target models are specified.
99+
//
100+
// Weight defines the proportion of requests forwarded to the specified
101+
// model. This is computed as weight/(sum of all weights in this
102+
// TargetModels list). For non-zero values, there may be some epsilon from
103+
// the exact proportion defined here depending on the precision an
104+
// implementation supports. Weight is not a percentage and the sum of
105+
// weights does not need to equal 100.
106+
//
107+
// If a weight is set for any targetModel, it must be set for all targetModels.
108+
// Conversely weights are optional, so long as ALL targetModels do not specify a weight.
109+
//
110+
// +optional
111+
// +kubebuilder:validation:Minimum=1
112+
// +kubebuilder:validation:Maximum=1000000
113+
Weight int32 `json:"weight"`
114+
115+
// --- Destination Types ---
116+
// ModelRewrite specifies a static model name destination.
117+
// +optional
118+
ModelRewrite string `json:"modelRewrite"`
119+
}
120+
121+
// Match defines the criteria for matching the LLM requests.
122+
type Match struct {
123+
// Model specifies the criteria for matching the 'model' field
124+
// within the JSON request body.
125+
// +required
126+
Model *ModelMatch `json:"model,omitempty"`
127+
}
128+
129+
// ModelMatch defines how to match against the model name in the request body.
130+
type ModelMatch struct {
131+
// Type specifies the kind of string matching to use.
132+
// Supported value is "Exact". Defaults to "Exact".
133+
// +optional
134+
// +kubebuilder:default=Exact
135+
Type *MatchValidationType `json:"type,omitempty"`
136+
137+
// Value is the model name string to match against.
138+
// +required
139+
// +kubebuilder:validation:MinLength=1
140+
Value string `json:"value"`
141+
}
142+
143+
// MatchValidationType specifies the type of string matching to use.
144+
// +kubebuilder:validation:Enum=Exact
145+
type MatchValidationType string
146+
147+
const (
148+
// MatchExact indicates that the model name must match exactly.
149+
MatchExact MatchValidationType = "Exact"
150+
)
151+
152+
// InferenceModelRewriteStatus defines the observed state of InferenceModelRewrite.
153+
type InferenceModelRewriteStatus struct {
154+
// Conditions track the state of the InferenceModelRewrite.
155+
//
156+
// Known condition types are:
157+
//
158+
// * "Accepted"
159+
//
160+
// +optional
161+
// +listType=map
162+
// +listMapKey=type
163+
// +kubebuilder:validation:MaxItems=8
164+
// +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
165+
Conditions []metav1.Condition `json:"conditions,omitempty"`
166+
}
167+
168+
// InferenceModelRewriteConditionType is a type of condition for the InferenceModelRewrite.
169+
type InferenceModelRewriteConditionType string
170+
171+
// InferenceModelRewriteConditionReason is the reason for a given InferenceModelRewriteConditionType.
172+
type InferenceModelRewriteConditionReason string
173+
174+
const (
175+
// RewriteConditionAccepted indicates if the rewrite policy is accepted, and if not, why.
176+
// This is the primary condition for this resource.
177+
//
178+
// Possible reasons for this condition to be True are:
179+
//
180+
// * "Accepted"
181+
//
182+
// Possible reasons for this condition to be Unknown are:
183+
//
184+
// * "Pending"
185+
//
186+
RewriteConditionAccepted InferenceModelRewriteConditionType = "Accepted"
187+
188+
// RewriteReasonAccepted indicates the policy is valid, non-conflicting,
189+
// and has been successfully applied to the inference pool.
190+
RewriteReasonAccepted InferenceModelRewriteConditionReason = "Accepted"
191+
192+
// RewriteReasonPending is the initial state, and indicates that the
193+
// controller has not yet reconciled the InferenceModelRewrite.
194+
RewriteReasonPending InferenceModelRewriteConditionReason = "Pending"
195+
)

apix/v1alpha2/inferenceobjective_types.go

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -78,27 +78,6 @@ type InferenceObjectiveSpec struct {
7878
PoolRef PoolObjectReference `json:"poolRef"`
7979
}
8080

81-
// PoolObjectReference identifies an API object within the namespace of the
82-
// referrer.
83-
type PoolObjectReference struct {
84-
// Group is the group of the referent.
85-
//
86-
// +optional
87-
// +kubebuilder:default="inference.networking.k8s.io"
88-
Group Group `json:"group,omitempty"`
89-
90-
// Kind is kind of the referent. For example "InferencePool".
91-
//
92-
// +optional
93-
// +kubebuilder:default="InferencePool"
94-
Kind Kind `json:"kind,omitempty"`
95-
96-
// Name is the name of the referent.
97-
//
98-
// +kubebuilder:validation:Required
99-
Name ObjectName `json:"name"`
100-
}
101-
10281
// InferenceObjectiveStatus defines the observed state of InferenceObjective
10382
type InferenceObjectiveStatus struct {
10483
// Conditions track the state of the InferenceObjective.

apix/v1alpha2/shared_types.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,24 @@ type LabelKey string
127127
// +kubebuilder:validation:MaxLength=63
128128
// +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`
129129
type LabelValue string
130+
131+
// PoolObjectReference identifies an API object within the namespace of the
132+
// referrer.
133+
type PoolObjectReference struct {
134+
// Group is the group of the referent.
135+
//
136+
// +optional
137+
// +kubebuilder:default="inference.networking.k8s.io"
138+
Group Group `json:"group,omitempty"`
139+
140+
// Kind is kind of the referent. For example "InferencePool".
141+
//
142+
// +optional
143+
// +kubebuilder:default="InferencePool"
144+
Kind Kind `json:"kind,omitempty"`
145+
146+
// Name is the name of the referent.
147+
//
148+
// +kubebuilder:validation:Required
149+
Name ObjectName `json:"name"`
150+
}

0 commit comments

Comments
 (0)