Skip to content

Commit 16e4dc4

Browse files
committed
feat: support inferencepool v1
Signed-off-by: bitliu <bitliu@tencent.com>
1 parent ec2b79c commit 16e4dc4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+318
-323
lines changed

.golangci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ linters:
5050
alias: egv1a1
5151
- pkg: github.com/envoyproxy/ai-gateway/api/v1alpha1
5252
alias: aigv1a1
53-
- pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2
54-
alias: gwaiev1a2
53+
- pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1
54+
alias: gwaiev1
5555
- pkg: k8s.io/apimachinery/pkg/apis/meta/v1
5656
alias: metav1
5757
- pkg: k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1

api/v1alpha1/ai_gateway_route.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ type AIGatewayRouteRule struct {
290290
// It can reference either an AIServiceBackend or an InferencePool resource.
291291
//
292292
// +kubebuilder:validation:XValidation:rule="!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))", message="group and kind must be specified together"
293-
// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.x-k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.x-k8s.io group is supported"
293+
// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.k8s.io group is supported"
294294
type AIGatewayRouteRuleBackendRef struct {
295295
// Name is the name of the backend resource.
296296
// When Group and Kind are not specified, this refers to an AIServiceBackend.
@@ -302,7 +302,7 @@ type AIGatewayRouteRuleBackendRef struct {
302302

303303
// Group is the group of the backend resource.
304304
// When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
305-
// Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources.
305+
// Currently, only "inference.networking.k8s.io" is supported for InferencePool resources.
306306
//
307307
// +optional
308308
// +kubebuilder:validation:MaxLength=253

api/v1alpha1/ai_gateway_route_helper.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const (
1515
defaultRequestTimeout gwapiv1.Duration = "60s"
1616

1717
// inferencePoolGroup is the API group for InferencePool resources.
18-
inferencePoolGroup = "inference.networking.x-k8s.io"
18+
inferencePoolGroup = "inference.networking.k8s.io"
1919
// inferencePoolKind is the kind for InferencePool resources.
2020
inferencePoolKind = "InferencePool"
2121
)

cmd/aigw/envoy-gateway-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ extensionApis:
2424
enableBackend: true
2525
extensionManager:
2626
backendResources:
27-
- group: inference.networking.x-k8s.io
27+
- group: inference.networking.k8s.io
2828
kind: InferencePool
2929
version: v1alpha2
3030
hooks:

docs/proposals/003-epp-integration-proposal/proposal.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ When request goes to envoyproxy, it goes to the http filter chain, the ext-proc
5050
The gRPC service info is pre-defined in [InferencePool](https://gateway-api-inference-extension.sigs.k8s.io/api-types/inferencepool/) extensionRef, giving an example below:
5151

5252
```
53-
apiVersion: inference.networking.x-k8s.io/v1alpha2
53+
apiVersion: inference.networking.k8s.io/v1
5454
kind: InferencePool
5555
metadata:
5656
name: vllm-llama3-8b-instruct
@@ -80,7 +80,7 @@ spec:
8080
name: inference-gateway
8181
rules:
8282
- backendRefs:
83-
- group: inference.networking.x-k8s.io
83+
- group: inference.networking.k8s.io
8484
kind: InferencePool
8585
name: vllm-llama3-8b-instruct
8686
matches:
@@ -208,7 +208,7 @@ This requires to expand the `AIGatewayRouteRuleBackendRef` with `BackendObjectRe
208208
+ When it matches vllm-llama3-8b-instruct goes to InferencePool `vllm-llama3-8b-instruct`
209209

210210
```
211-
apiVersion: inference.networking.x-k8s.io/v1alpha2
211+
apiVersion: inference.networking.k8s.io/v1
212212
kind: InferencePool
213213
metadata:
214214
name: vllm-llama3-8b-instruct
@@ -248,7 +248,7 @@ spec:
248248
value: vllm-llama3-8b-instruct
249249
backendRefs:
250250
- name: vllm-llama3-8b-instruct
251-
group: inference.networking.x-k8s.io
251+
group: inference.networking.k8s.io
252252
kind: InferencePool
253253
```
254254

@@ -268,7 +268,7 @@ This approach is preferred because InferencePool resources do not require Backen
268268
+ When it matches vllm-llama3-8b-instruct goes to AIServiceBackend `vllm-llama3-8b-instruct`
269269

270270
```yaml
271-
apiVersion: inference.networking.x-k8s.io/v1alpha2
271+
apiVersion: inference.networking.k8s.io/v1
272272
kind: InferencePool
273273
metadata:
274274
name: vllm-llama3-8b-instruct
@@ -318,7 +318,7 @@ spec:
318318
name: OpenAI
319319
backendRef:
320320
name: vllm-llama3-8b-instruct
321-
group: inference.networking.x-k8s.io
321+
group: inference.networking.k8s.io
322322
kind: InferencePool
323323
```
324324
@@ -383,7 +383,7 @@ It adds the the cluster with override_host loadBalancingPolicy, we can add the h
383383
Take the configuration below as an example:
384384

385385
```yaml
386-
apiVersion: inference.networking.x-k8s.io/v1alpha2
386+
apiVersion: inference.networking.k8s.io/v1
387387
kind: InferencePool
388388
metadata:
389389
name: vllm-llama3-8b-instruct
@@ -416,7 +416,7 @@ spec:
416416
value: vllm-llama3-8b-instruct
417417
backendRefs:
418418
- name: vllm-llama3-8b-instruct
419-
group: inference.networking.x-k8s.io
419+
group: inference.networking.k8s.io
420420
kind: InferencePool
421421
```
422422

@@ -581,7 +581,7 @@ spec:
581581
name: x-ai-eg-model
582582
value: meta-llama/Llama-3.1-8B-Instruct
583583
backendRefs:
584-
- group: inference.networking.x-k8s.io
584+
- group: inference.networking.k8s.io
585585
kind: InferencePool
586586
name: vllm-llama3-8b-instruct
587587
- matches:
@@ -590,7 +590,7 @@ spec:
590590
name: x-ai-eg-model
591591
value: mistral:latest
592592
backendRefs:
593-
- group: inference.networking.x-k8s.io
593+
- group: inference.networking.k8s.io
594594
kind: InferencePool
595595
name: mistral
596596
- matches:
@@ -618,7 +618,7 @@ spec:
618618
namespace: default
619619
rules:
620620
- backendRefs:
621-
- group: inference.networking.x-k8s.io
621+
- group: inference.networking.k8s.io
622622
kind: InferencePool
623623
name: vllm-llama3-8b-instruct
624624
namespace: default

examples/inference-pool/aigwroute.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ spec:
5151
name: Authorization
5252
value: sk-zyxwvutsrqponmlkjihgfedcba
5353
backendRefs:
54-
- group: inference.networking.x-k8s.io
54+
- group: inference.networking.k8s.io
5555
kind: InferencePool
5656
name: vllm-llama3-8b-instruct
5757
- matches:
@@ -60,7 +60,7 @@ spec:
6060
name: x-ai-eg-model
6161
value: mistral:latest
6262
backendRefs:
63-
- group: inference.networking.x-k8s.io
63+
- group: inference.networking.k8s.io
6464
kind: InferencePool
6565
name: mistral
6666
- matches:

examples/inference-pool/base.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ spec:
4949
initialDelaySeconds: 1
5050
periodSeconds: 1
5151
---
52-
apiVersion: inference.networking.x-k8s.io/v1alpha2
52+
apiVersion: inference.networking.k8s.io/v1
5353
kind: InferencePool
5454
metadata:
5555
name: mistral
@@ -61,16 +61,16 @@ spec:
6161
extensionRef:
6262
name: mistral-epp
6363
---
64-
apiVersion: inference.networking.x-k8s.io/v1alpha2
65-
kind: InferenceModel
64+
apiVersion: inference.networking.k8s.io/v1
65+
kind: InferenceObjective
6666
metadata:
6767
name: mistral
6868
namespace: default
6969
spec:
7070
modelName: mistral:latest
7171
criticality: Critical
7272
poolRef:
73-
# Bind the InferenceModel to the InferencePool.
73+
# Bind the InferenceObjective to the InferencePool.
7474
name: mistral
7575
---
7676
apiVersion: v1
@@ -158,7 +158,7 @@ metadata:
158158
namespace: default
159159
data:
160160
default-plugins.yaml: |
161-
apiVersion: inference.networking.x-k8s.io/v1alpha1
161+
apiVersion: inference.networking.k8s.io/v1alpha1
162162
kind: EndpointPickerConfig
163163
plugins:
164164
- type: low-queue-filter
@@ -208,7 +208,7 @@ data:
208208
- pluginRef: low-latency-filter
209209
- pluginRef: random-picker
210210
plugins-v2.yaml: |
211-
apiVersion: inference.networking.x-k8s.io/v1alpha1
211+
apiVersion: inference.networking.k8s.io/v1alpha1
212212
kind: EndpointPickerConfig
213213
plugins:
214214
- type: queue-scorer
@@ -238,10 +238,10 @@ apiVersion: rbac.authorization.k8s.io/v1
238238
metadata:
239239
name: pod-read
240240
rules:
241-
- apiGroups: ["inference.networking.x-k8s.io"]
241+
- apiGroups: ["inference.networking.k8s.io"]
242242
resources: ["inferencepools"]
243243
verbs: ["get", "watch", "list"]
244-
- apiGroups: ["inference.networking.x-k8s.io"]
244+
- apiGroups: ["inference.networking.k8s.io"]
245245
resources: ["inferencemodels"]
246246
verbs: ["get", "watch", "list"]
247247
- apiGroups: [""]

examples/inference-pool/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ data:
4242
enableBackend: true
4343
extensionManager:
4444
backendResources:
45-
- group: inference.networking.x-k8s.io
45+
- group: inference.networking.k8s.io
4646
kind: InferencePool
4747
version: v1alpha2
4848
hooks:

examples/inference-pool/httproute.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ spec:
3535
namespace: default
3636
rules:
3737
- backendRefs:
38-
- group: inference.networking.x-k8s.io
38+
- group: inference.networking.k8s.io
3939
kind: InferencePool
4040
name: vllm-llama3-8b-instruct
4141
namespace: default

go.mod

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ require (
5757
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397
5858
sigs.k8s.io/controller-runtime v0.21.0
5959
sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c
60-
sigs.k8s.io/gateway-api-inference-extension v0.4.0
60+
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250811034505-928e051eb49e
6161
sigs.k8s.io/yaml v1.6.0
6262
)
6363

@@ -178,7 +178,7 @@ require (
178178
github.com/ebitengine/purego v0.8.4 // indirect
179179
github.com/editorconfig-checker/editorconfig-checker/v3 v3.1.1 // indirect
180180
github.com/editorconfig/editorconfig-core-go/v2 v2.6.2 // indirect
181-
github.com/elastic/crd-ref-docs v0.1.0 // indirect
181+
github.com/elastic/crd-ref-docs v0.2.0 // indirect
182182
github.com/emicklei/go-restful/v3 v3.12.2 // indirect
183183
github.com/emirpasic/gods v1.18.1 // indirect
184184
github.com/envoyproxy/go-control-plane/contrib v1.32.5-0.20250430092421-68a532e11403 // indirect
@@ -228,7 +228,7 @@ require (
228228
github.com/go-xmlfmt/xmlfmt v1.1.3 // indirect
229229
github.com/gobuffalo/flect v1.0.3 // indirect
230230
github.com/gobwas/glob v0.2.3 // indirect
231-
github.com/goccy/go-yaml v1.11.3 // indirect
231+
github.com/goccy/go-yaml v1.18.0 // indirect
232232
github.com/gofrs/flock v0.12.1 // indirect
233233
github.com/gogo/protobuf v1.3.2 // indirect
234234
github.com/golang-jwt/jwt/v5 v5.2.2 // indirect
@@ -457,7 +457,7 @@ require (
457457
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0 // indirect
458458
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.37.0 // indirect
459459
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0 // indirect
460-
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 // indirect
460+
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 // indirect
461461
go.uber.org/automaxprocs v1.6.0 // indirect
462462
go.uber.org/multierr v1.11.0 // indirect
463463
go.yaml.in/yaml/v2 v2.4.2 // indirect
@@ -472,7 +472,6 @@ require (
472472
golang.org/x/text v0.27.0 // indirect
473473
golang.org/x/time v0.12.0 // indirect
474474
golang.org/x/tools v0.35.0 // indirect
475-
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
476475
gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect
477476
google.golang.org/genproto/googleapis/api v0.0.0-20250728155136-f173205681a0 // indirect
478477
google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 // indirect
@@ -486,7 +485,9 @@ require (
486485
honnef.co/go/tools v0.6.1 // indirect
487486
k8s.io/apiserver v0.33.3 // indirect
488487
k8s.io/cli-runtime v0.33.3 // indirect
488+
k8s.io/code-generator v0.33.3 // indirect
489489
k8s.io/component-base v0.33.3 // indirect
490+
k8s.io/gengo/v2 v2.0.0-20250207200755-1244d31929d7 // indirect
490491
k8s.io/klog/v2 v2.130.1 // indirect
491492
k8s.io/kube-openapi v0.0.0-20250626002932-679f732ef8b8 // indirect
492493
k8s.io/kubectl v0.33.3 // indirect
@@ -495,7 +496,7 @@ require (
495496
oras.land/oras-go/v2 v2.6.0 // indirect
496497
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
497498
sigs.k8s.io/controller-runtime/tools/setup-envtest v0.0.0-20250217160221-5e8256e05002 // indirect
498-
sigs.k8s.io/controller-tools v0.17.3 // indirect
499+
sigs.k8s.io/controller-tools v0.18.0 // indirect
499500
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
500501
sigs.k8s.io/kind v0.29.0 // indirect
501502
sigs.k8s.io/kubectl-validate v0.0.5-0.20241223122011-eb064d2f92d5 // indirect

0 commit comments

Comments
 (0)