envoyproxy
diff --git a/‎.golangci.yml
Lines changed: 2 additions & 2 deletions b/‎.golangci.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎api/v1alpha1/ai_gateway_route.go
Lines changed: 2 additions & 2 deletions b/‎api/v1alpha1/ai_gateway_route.go
Lines changed: 2 additions & 2 deletions
diff --git a/‎api/v1alpha1/ai_gateway_route_helper.go
Lines changed: 1 addition & 1 deletion b/‎api/v1alpha1/ai_gateway_route_helper.go
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmd/aigw/envoy-gateway-config.yaml
Lines changed: 1 addition & 1 deletion b/‎cmd/aigw/envoy-gateway-config.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/proposals/003-epp-integration-proposal/proposal.md
Lines changed: 11 additions & 11 deletions b/‎docs/proposals/003-epp-integration-proposal/proposal.md
Lines changed: 11 additions & 11 deletions
diff --git a/‎examples/inference-pool/aigwroute.yaml
Lines changed: 2 additions & 2 deletions b/‎examples/inference-pool/aigwroute.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/inference-pool/base.yaml
Lines changed: 8 additions & 8 deletions b/‎examples/inference-pool/base.yaml
Lines changed: 8 additions & 8 deletions
diff --git a/‎examples/inference-pool/config.yaml
Lines changed: 1 addition & 1 deletion b/‎examples/inference-pool/config.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/inference-pool/httproute.yaml
Lines changed: 1 addition & 1 deletion b/‎examples/inference-pool/httproute.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎go.mod
Lines changed: 7 additions & 6 deletions b/‎go.mod
Lines changed: 7 additions & 6 deletions
@@ -50,8 +50,8 @@ linters:
           alias: egv1a1
         - pkg: github.com/envoyproxy/ai-gateway/api/v1alpha1
           alias: aigv1a1
-        - pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2
-          alias: gwaiev1a2
+        - pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1
+          alias: gwaiev1
         - pkg: k8s.io/apimachinery/pkg/apis/meta/v1
           alias: metav1
         - pkg: k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1
 
@@ -290,7 +290,7 @@ type AIGatewayRouteRule struct {
 // It can reference either an AIServiceBackend or an InferencePool resource.
 //
 // +kubebuilder:validation:XValidation:rule="!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))", message="group and kind must be specified together"
-// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.x-k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.x-k8s.io group is supported"
+// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.k8s.io group is supported"
 type AIGatewayRouteRuleBackendRef struct {
 	// Name is the name of the backend resource.
 	// When Group and Kind are not specified, this refers to an AIServiceBackend.
@@ -302,7 +302,7 @@ type AIGatewayRouteRuleBackendRef struct {
 
 	// Group is the group of the backend resource.
 	// When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
-	// Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources.
+	// Currently, only "inference.networking.k8s.io" is supported for InferencePool resources.
 	//
 	// +optional
 	// +kubebuilder:validation:MaxLength=253
 
@@ -15,7 +15,7 @@ const (
 	defaultRequestTimeout gwapiv1.Duration = "60s"
 
 	// inferencePoolGroup is the API group for InferencePool resources.
-	inferencePoolGroup = "inference.networking.x-k8s.io"
+	inferencePoolGroup = "inference.networking.k8s.io"
 	// inferencePoolKind is the kind for InferencePool resources.
 	inferencePoolKind = "InferencePool"
 )
 
@@ -24,7 +24,7 @@ extensionApis:
   enableBackend: true
 extensionManager:
   backendResources:
-    - group: inference.networking.x-k8s.io
+    - group: inference.networking.k8s.io
       kind: InferencePool
       version: v1alpha2
   hooks:
 
@@ -50,7 +50,7 @@ When request goes to envoyproxy, it goes to the http filter chain, the ext-proc
 The gRPC service info is pre-defined in [InferencePool](https://gateway-api-inference-extension.sigs.k8s.io/api-types/inferencepool/) extensionRef, giving an example below:
 
 ```
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
@@ -80,7 +80,7 @@ spec:
     name: inference-gateway
   rules:
   - backendRefs:
-    - group: inference.networking.x-k8s.io
+    - group: inference.networking.k8s.io
       kind: InferencePool
       name: vllm-llama3-8b-instruct
     matches:
@@ -208,7 +208,7 @@ This requires to expand the `AIGatewayRouteRuleBackendRef` with `BackendObjectRe
 + When it matches vllm-llama3-8b-instruct goes to InferencePool `vllm-llama3-8b-instruct`
 
 ```
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
@@ -248,7 +248,7 @@ spec:
               value: vllm-llama3-8b-instruct
       backendRefs:
         - name: vllm-llama3-8b-instruct
-        	group: inference.networking.x-k8s.io
+        	group: inference.networking.k8s.io
           kind: InferencePool
 ```
 
@@ -268,7 +268,7 @@ This approach is preferred because InferencePool resources do not require Backen
 + When it matches vllm-llama3-8b-instruct goes to AIServiceBackend `vllm-llama3-8b-instruct`
 
 ```yaml
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
@@ -318,7 +318,7 @@ spec:
     name: OpenAI
   backendRef:
     name: vllm-llama3-8b-instruct
-    group: inference.networking.x-k8s.io
+    group: inference.networking.k8s.io
     kind: InferencePool
 ```
 
@@ -383,7 +383,7 @@ It adds the the cluster with override_host loadBalancingPolicy, we can add the h
 Take the configuration below as an example:
 
 ```yaml
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
@@ -416,7 +416,7 @@ spec:
               value: vllm-llama3-8b-instruct
       backendRefs:
         - name: vllm-llama3-8b-instruct
-        	group: inference.networking.x-k8s.io
+        	group: inference.networking.k8s.io
           kind: InferencePool
 ```
 
@@ -581,7 +581,7 @@ spec:
               name: x-ai-eg-model
               value: meta-llama/Llama-3.1-8B-Instruct
       backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: vllm-llama3-8b-instruct
     - matches:
@@ -590,7 +590,7 @@ spec:
               name: x-ai-eg-model
               value: mistral:latest
       backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: mistral
     - matches:
@@ -618,7 +618,7 @@ spec:
       namespace: default
   rules:
     - backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: vllm-llama3-8b-instruct
           namespace: default
 
@@ -51,7 +51,7 @@ spec:
               name: Authorization
               value: sk-zyxwvutsrqponmlkjihgfedcba
       backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: vllm-llama3-8b-instruct
     - matches:
@@ -60,7 +60,7 @@ spec:
               name: x-ai-eg-model
               value: mistral:latest
       backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: mistral
     - matches:
 
@@ -49,7 +49,7 @@ spec:
             initialDelaySeconds: 1
             periodSeconds: 1
 ---
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: mistral
@@ -61,16 +61,16 @@ spec:
   extensionRef:
     name: mistral-epp
 ---
-apiVersion: inference.networking.x-k8s.io/v1alpha2
-kind: InferenceModel
+apiVersion: inference.networking.k8s.io/v1
+kind: InferenceObjective
 metadata:
   name: mistral
   namespace: default
 spec:
   modelName: mistral:latest
   criticality: Critical
   poolRef:
-    # Bind the InferenceModel to the InferencePool.
+    # Bind the InferenceObjective to the InferencePool.
     name: mistral
 ---
 apiVersion: v1
@@ -158,7 +158,7 @@ metadata:
   namespace: default
 data:
   default-plugins.yaml: |
-    apiVersion: inference.networking.x-k8s.io/v1alpha1
+    apiVersion: inference.networking.k8s.io/v1alpha1
     kind: EndpointPickerConfig
     plugins:
     - type: low-queue-filter
@@ -208,7 +208,7 @@ data:
       - pluginRef: low-latency-filter
       - pluginRef: random-picker
   plugins-v2.yaml: |
-    apiVersion: inference.networking.x-k8s.io/v1alpha1
+    apiVersion: inference.networking.k8s.io/v1alpha1
     kind: EndpointPickerConfig
     plugins:
     - type: queue-scorer
@@ -238,10 +238,10 @@ apiVersion: rbac.authorization.k8s.io/v1
 metadata:
   name: pod-read
 rules:
-  - apiGroups: ["inference.networking.x-k8s.io"]
+  - apiGroups: ["inference.networking.k8s.io"]
     resources: ["inferencepools"]
     verbs: ["get", "watch", "list"]
-  - apiGroups: ["inference.networking.x-k8s.io"]
+  - apiGroups: ["inference.networking.k8s.io"]
     resources: ["inferencemodels"]
     verbs: ["get", "watch", "list"]
   - apiGroups: [""]
 
@@ -42,7 +42,7 @@ data:
       enableBackend: true
     extensionManager:
       backendResources:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           version: v1alpha2
       hooks:
 
@@ -35,7 +35,7 @@ spec:
       namespace: default
   rules:
     - backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: vllm-llama3-8b-instruct
           namespace: default
 
@@ -57,7 +57,7 @@ require (
 	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397
 	sigs.k8s.io/controller-runtime v0.21.0
 	sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c
-	sigs.k8s.io/gateway-api-inference-extension v0.4.0
+	sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250811034505-928e051eb49e
 	sigs.k8s.io/yaml v1.6.0
 )
 
@@ -178,7 +178,7 @@ require (
 	github.com/ebitengine/purego v0.8.4 // indirect
 	github.com/editorconfig-checker/editorconfig-checker/v3 v3.1.1 // indirect
 	github.com/editorconfig/editorconfig-core-go/v2 v2.6.2 // indirect
-	github.com/elastic/crd-ref-docs v0.1.0 // indirect
+	github.com/elastic/crd-ref-docs v0.2.0 // indirect
 	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
 	github.com/emirpasic/gods v1.18.1 // indirect
 	github.com/envoyproxy/go-control-plane/contrib v1.32.5-0.20250430092421-68a532e11403 // indirect
@@ -228,7 +228,7 @@ require (
 	github.com/go-xmlfmt/xmlfmt v1.1.3 // indirect
 	github.com/gobuffalo/flect v1.0.3 // indirect
 	github.com/gobwas/glob v0.2.3 // indirect
-	github.com/goccy/go-yaml v1.11.3 // indirect
+	github.com/goccy/go-yaml v1.18.0 // indirect
 	github.com/gofrs/flock v0.12.1 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang-jwt/jwt/v5 v5.2.2 // indirect
@@ -457,7 +457,7 @@ require (
 	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.37.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0 // indirect
-	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 // indirect
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.yaml.in/yaml/v2 v2.4.2 // indirect
@@ -472,7 +472,6 @@ require (
 	golang.org/x/text v0.27.0 // indirect
 	golang.org/x/time v0.12.0 // indirect
 	golang.org/x/tools v0.35.0 // indirect
-	golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20250728155136-f173205681a0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 // indirect
@@ -486,7 +485,9 @@ require (
 	honnef.co/go/tools v0.6.1 // indirect
 	k8s.io/apiserver v0.33.3 // indirect
 	k8s.io/cli-runtime v0.33.3 // indirect
+	k8s.io/code-generator v0.33.3 // indirect
 	k8s.io/component-base v0.33.3 // indirect
+	k8s.io/gengo/v2 v2.0.0-20250207200755-1244d31929d7 // indirect
 	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20250626002932-679f732ef8b8 // indirect
 	k8s.io/kubectl v0.33.3 // indirect
@@ -495,7 +496,7 @@ require (
 	oras.land/oras-go/v2 v2.6.0 // indirect
 	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
 	sigs.k8s.io/controller-runtime/tools/setup-envtest v0.0.0-20250217160221-5e8256e05002 // indirect
-	sigs.k8s.io/controller-tools v0.17.3 // indirect
+	sigs.k8s.io/controller-tools v0.18.0 // indirect
 	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
 	sigs.k8s.io/kind v0.29.0 // indirect
 	sigs.k8s.io/kubectl-validate v0.0.5-0.20241223122011-eb064d2f92d5 // indirect
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ const (`
`15`	`15`	`defaultRequestTimeout gwapiv1.Duration = "60s"`
`16`	`16`
`17`	`17`	`// inferencePoolGroup is the API group for InferencePool resources.`
`18`		`- inferencePoolGroup = "inference.networking.x-k8s.io"`
	`18`	`+ inferencePoolGroup = "inference.networking.k8s.io"`
`19`	`19`	`// inferencePoolKind is the kind for InferencePool resources.`
`20`	`20`	`inferencePoolKind = "InferencePool"`
`21`	`21`	`)`