From 343e558ead71e227842f0143b662237bd93bb6db Mon Sep 17 00:00:00 2001 From: Sindhu Shiv Date: Wed, 29 Oct 2025 13:46:26 -0700 Subject: [PATCH 1/9] docs: Adding the Gateway inference support documentation for Nginx Gateway Fabric --- .../gateway/nginxgatewayfabric/gateway.yaml | 10 ++ .../gateway/nginxgatewayfabric/httproute.yaml | 17 +++ site-src/_includes/epp-latest.md | 10 ++ site-src/guides/getting-started-latest.md | 106 ++++++++++++++++++ site-src/implementations/gateways.md | 9 ++ 5 files changed, 152 insertions(+) create mode 100644 config/manifests/gateway/nginxgatewayfabric/gateway.yaml create mode 100644 config/manifests/gateway/nginxgatewayfabric/httproute.yaml diff --git a/config/manifests/gateway/nginxgatewayfabric/gateway.yaml b/config/manifests/gateway/nginxgatewayfabric/gateway.yaml new file mode 100644 index 000000000..d70ec98a3 --- /dev/null +++ b/config/manifests/gateway/nginxgatewayfabric/gateway.yaml @@ -0,0 +1,10 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: inference-gateway +spec: + gatewayClassName: nginx +listeners: +- name: http + port: 80 + protocol: HTTP diff --git a/config/manifests/gateway/nginxgatewayfabric/httproute.yaml b/config/manifests/gateway/nginxgatewayfabric/httproute.yaml new file mode 100644 index 000000000..01c30ac5f --- /dev/null +++ b/config/manifests/gateway/nginxgatewayfabric/httproute.yaml @@ -0,0 +1,17 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: +name: llm-route +namespace: default +spec: +parentRefs: +- name: inference-gateway +rules: +- matches: + - path: + type: PathPrefix + value: / + backendRefs: + - group: inference.networking.k8s.io + kind: InferencePool + name: vllm-llama3-8b-instruct \ No newline at end of file diff --git a/site-src/_includes/epp-latest.md b/site-src/_includes/epp-latest.md index ef08a61be..b194f3afe 100644 --- a/site-src/_includes/epp-latest.md +++ b/site-src/_includes/epp-latest.md @@ -30,3 +30,13 @@ --version $IGW_CHART_VERSION \ oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool ``` + +=== "Nginx Gateway Fabric" + + ```bash + export IGW_CHART_VERSION=v1.0.2 + helm install vllm-llama3-8b-instruct \ + --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ + --version $IGW_CHART_VERSION \ + oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool + ``` \ No newline at end of file diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md index 13fb830b8..405df45b9 100644 --- a/site-src/guides/getting-started-latest.md +++ b/site-src/guides/getting-started-latest.md @@ -193,6 +193,72 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens kubectl get httproute llm-route -o yaml ``` +=== "Nginx Gateway Fabric" + + Nginx Gateway Fabric is an implementation of the Gateway API that supports the Inference Extension. Follow these steps to deploy an Inference Gateway using NGF. + + 1. Requirements + + - Gateway API [CRDs](https://gateway-api.sigs.k8s.io/guides/#installing-gateway-api) installed (Standard or Experimental channel). + - [Helm](https://helm.sh/docs/intro/install/) installed. + - A Kubernetes cluster with LoadBalancer or NodePort access. + + 2. Install the Inference Extension CRDs + + ```bash + kubectl kustomize "https://github.com/nginx/nginx-gateway-fabric/config/crd/inference-extension/?ref=v2.2.0" | kubectl apply -f - + ``` + + 3. Install NGINX Gateway Fabric with the Inference Extension enabled by setting the nginxGateway.gwAPIInferenceExtension.enable=true Helm value + + ```bash + helm repo add nginx-stable https://helm.nginx.com/stable + helm upgrade -i nginx-gateway-fabric nginx-stable/nginx-gateway-fabric \ + --namespace nginx-gateway --create-namespace \ + --set nginxGateway.gwAPIInferenceExtension.enable=true + ``` + This enables NGF to recognize and manage Inference Extension resources such as InferencePool and InferenceObjective. + + 4. Deploy the Gateway + + ```bash + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/gateway.yaml + ``` + + Verify that the Gateway was successfully provisioned and shows Programmed=True: + + ```bash + kubectl describe gateway inference-gateway + ``` + + 5. Verify the Gateway status + + Confirm that the Gateway is running and has been assigned an address: + + ```bash + kubectl get gateway inference-gateway + ``` + + 6. Deploy the HTTPRoute + + Create the HTTPRoute resource to route traffic to your InferencePool: + + ```bash + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/httproute.yaml + ``` + + 7. Verify the route status + + Check that the HTTPRoute was successfully configured and references were resolved: + + ```bash + kubectl get httproute llm-route -o yaml + ``` + + The route status should include Accepted=True and ResolvedRefs=True. + + For more information, see the [NGF - Inference Gateway Setup guide](https://docs.nginx.com/nginx-gateway-fabric/how-to/gateway-api-inference-extension/#overview) + ### Deploy InferenceObjective (Optional) Deploy the sample InferenceObjective which allows you to specify priority of requests. @@ -285,3 +351,43 @@ Deploy the sample InferenceObjective which allows you to specify priority of req ```bash kubectl delete ns kgateway-system ``` + +=== "Nginx Gateway Fabric" + + Follow these steps to remove the NGINX Gateway Fabric (NGF) Inference Gateway and all related resources. + + 1. Remove Inference resources InferencePool, InferenceObjective, and model server resources: + + ```bash + helm uninstall vllm-llama3-8b-instruct + kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferenceobjective.yaml --ignore-not-found + kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found + kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found + kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/sim-deployment.yaml --ignore-not-found + ``` + + 2. Delete Gateway API Inference Extension CRDs: + + ```bash + kubectl delete -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd --ignore-not-found + ``` + + 3. Remove Inference Gateway and HTTPRoute: + + ```bash + kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/gateway.yaml --ignore-not-found + kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/httproute.yaml --ignore-not-found + ``` + + 4. Uninstall NGINX Gateway Fabric: + + ```bash + helm uninstall ngf -n nginx-gateway + ``` + + 5. Clean up namespace and CRDs: + + ```bash + kubectl delete ns nginx-gateway + kubectl delete -f https://raw.githubusercontent.com/nginx/nginx-gateway-fabric/v2.2.0/deploy/crds.yaml + ``` \ No newline at end of file diff --git a/site-src/implementations/gateways.md b/site-src/implementations/gateways.md index 8c7ee8dea..0f04b8c0e 100644 --- a/site-src/implementations/gateways.md +++ b/site-src/implementations/gateways.md @@ -9,6 +9,7 @@ This project has several implementations that are planned or in progress: - [Istio](#istio) - [Kgateway](#kgateway) - [Kubvernor](#kubvernor) + - [Nginx Gateway Fabric](#nginx-gateway-fabric) [1]:#alibaba-cloud-container-service-for-kubernetes [2]:#envoy-ai-gateway @@ -16,6 +17,7 @@ This project has several implementations that are planned or in progress: [4]:#istio [5]:#kgateway [6]:#kubvernor +[7]:#nginx-gateway-fabric Agentgateway can run independently or can be managed by [Kgateway](https://kgateway.dev/). @@ -98,3 +100,10 @@ Kgateway supports Inference Gateway with the [agentgateway](https://agentgateway [krg]:https://github.com/kubvernor/kubvernor [krgu]: https://github.com/kubvernor/kubvernor/blob/main/README.md +## Nginx Gateway Fabric + +[NGINX Gateway Fabric][nginx-gateway-fabric] is an open-source project that provides an implementation of the Gateway API using [NGINX][nginx] as the data plane. The goal of this project is to implement the core Gateway API to configure an HTTP or TCP/UDP load balancer, reverse-proxy, or API gateway for applications running on Kubernetes. You can find the comprehensive NGINX Gateway Fabric user documentation on the [NGINX Documentation][nginx-docs] website. + +[nginx-gateway-fabric]: https://github.com/nginx/nginx-gateway-fabric +[nginx]:https://nginx.org/ +[nginx-docs]:https://docs.nginx.com/nginx-gateway-fabric/ \ No newline at end of file From c694cf8a322764791b4f25374c7dedc454b39328 Mon Sep 17 00:00:00 2001 From: Sindhu Shiv Date: Mon, 3 Nov 2025 10:19:25 -0800 Subject: [PATCH 2/9] docs: Addressing comments --- site-src/_includes/epp-latest.md | 2 +- site-src/guides/getting-started-latest.md | 12 ++++++------ site-src/implementations/gateways.md | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/site-src/_includes/epp-latest.md b/site-src/_includes/epp-latest.md index b194f3afe..cd6983cb0 100644 --- a/site-src/_includes/epp-latest.md +++ b/site-src/_includes/epp-latest.md @@ -31,7 +31,7 @@ oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool ``` -=== "Nginx Gateway Fabric" +=== "NGINX Gateway Fabric" ```bash export IGW_CHART_VERSION=v1.0.2 diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md index 405df45b9..253132594 100644 --- a/site-src/guides/getting-started-latest.md +++ b/site-src/guides/getting-started-latest.md @@ -193,9 +193,9 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens kubectl get httproute llm-route -o yaml ``` -=== "Nginx Gateway Fabric" +=== "NGINX Gateway Fabric" - Nginx Gateway Fabric is an implementation of the Gateway API that supports the Inference Extension. Follow these steps to deploy an Inference Gateway using NGF. + NGINX Gateway Fabric is an implementation of the Gateway API that supports the Inference Extension. Follow these steps to deploy an Inference Gateway using NGINX Gateway Fabric. 1. Requirements @@ -217,7 +217,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens --namespace nginx-gateway --create-namespace \ --set nginxGateway.gwAPIInferenceExtension.enable=true ``` - This enables NGF to recognize and manage Inference Extension resources such as InferencePool and InferenceObjective. + This enables NGINX Gateway Fabric to recognize and manage Inference Extension resources such as InferencePool and InferenceObjective. 4. Deploy the Gateway @@ -257,7 +257,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens The route status should include Accepted=True and ResolvedRefs=True. - For more information, see the [NGF - Inference Gateway Setup guide](https://docs.nginx.com/nginx-gateway-fabric/how-to/gateway-api-inference-extension/#overview) + For more information, see the [NGINX Gateway Fabric - Inference Gateway Setup guide](https://docs.nginx.com/nginx-gateway-fabric/how-to/gateway-api-inference-extension/#overview) ### Deploy InferenceObjective (Optional) @@ -352,9 +352,9 @@ Deploy the sample InferenceObjective which allows you to specify priority of req kubectl delete ns kgateway-system ``` -=== "Nginx Gateway Fabric" +=== "NGINX Gateway Fabric" - Follow these steps to remove the NGINX Gateway Fabric (NGF) Inference Gateway and all related resources. + Follow these steps to remove the NGINX Gateway Fabric Inference Gateway and all related resources. 1. Remove Inference resources InferencePool, InferenceObjective, and model server resources: diff --git a/site-src/implementations/gateways.md b/site-src/implementations/gateways.md index 0f04b8c0e..7ea67f1a3 100644 --- a/site-src/implementations/gateways.md +++ b/site-src/implementations/gateways.md @@ -9,7 +9,7 @@ This project has several implementations that are planned or in progress: - [Istio](#istio) - [Kgateway](#kgateway) - [Kubvernor](#kubvernor) - - [Nginx Gateway Fabric](#nginx-gateway-fabric) + - [NGINX Gateway Fabric](#nginx-gateway-fabric) [1]:#alibaba-cloud-container-service-for-kubernetes [2]:#envoy-ai-gateway @@ -100,7 +100,7 @@ Kgateway supports Inference Gateway with the [agentgateway](https://agentgateway [krg]:https://github.com/kubvernor/kubvernor [krgu]: https://github.com/kubvernor/kubvernor/blob/main/README.md -## Nginx Gateway Fabric +## NGINX Gateway Fabric [NGINX Gateway Fabric][nginx-gateway-fabric] is an open-source project that provides an implementation of the Gateway API using [NGINX][nginx] as the data plane. The goal of this project is to implement the core Gateway API to configure an HTTP or TCP/UDP load balancer, reverse-proxy, or API gateway for applications running on Kubernetes. You can find the comprehensive NGINX Gateway Fabric user documentation on the [NGINX Documentation][nginx-docs] website. From 1bca5c6cdd226e7a8505a49c7cd412c895d3df0e Mon Sep 17 00:00:00 2001 From: Sindhu Shiv Date: Tue, 4 Nov 2025 13:06:18 -0800 Subject: [PATCH 3/9] docs: Addressed new set of comments --- .../gateway/nginxgatewayfabric/httproute.yaml | 3 +- site-src/_includes/epp-latest.md | 1 - site-src/guides/getting-started-latest.md | 45 ++++++++----------- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/config/manifests/gateway/nginxgatewayfabric/httproute.yaml b/config/manifests/gateway/nginxgatewayfabric/httproute.yaml index 01c30ac5f..0cefa46bd 100644 --- a/config/manifests/gateway/nginxgatewayfabric/httproute.yaml +++ b/config/manifests/gateway/nginxgatewayfabric/httproute.yaml @@ -14,4 +14,5 @@ rules: backendRefs: - group: inference.networking.k8s.io kind: InferencePool - name: vllm-llama3-8b-instruct \ No newline at end of file + name: vllm-llama3-8b-instruct + diff --git a/site-src/_includes/epp-latest.md b/site-src/_includes/epp-latest.md index cd6983cb0..351af5ae7 100644 --- a/site-src/_includes/epp-latest.md +++ b/site-src/_includes/epp-latest.md @@ -34,7 +34,6 @@ === "NGINX Gateway Fabric" ```bash - export IGW_CHART_VERSION=v1.0.2 helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --version $IGW_CHART_VERSION \ diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md index 253132594..8a5450793 100644 --- a/site-src/guides/getting-started-latest.md +++ b/site-src/guides/getting-started-latest.md @@ -209,7 +209,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens kubectl kustomize "https://github.com/nginx/nginx-gateway-fabric/config/crd/inference-extension/?ref=v2.2.0" | kubectl apply -f - ``` - 3. Install NGINX Gateway Fabric with the Inference Extension enabled by setting the nginxGateway.gwAPIInferenceExtension.enable=true Helm value + 3. Install NGINX Gateway Fabric with the Inference Extension enabled by setting the `nginxGateway.gwAPIInferenceExtension.enable=true` Helm value ```bash helm repo add nginx-stable https://helm.nginx.com/stable @@ -217,7 +217,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens --namespace nginx-gateway --create-namespace \ --set nginxGateway.gwAPIInferenceExtension.enable=true ``` - This enables NGINX Gateway Fabric to recognize and manage Inference Extension resources such as InferencePool and InferenceObjective. + This enables NGINX Gateway Fabric to watch and manage Inference Extension resources such as InferencePool and InferenceObjective. 4. Deploy the Gateway @@ -225,19 +225,15 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/gateway.yaml ``` - Verify that the Gateway was successfully provisioned and shows Programmed=True: - - ```bash - kubectl describe gateway inference-gateway - ``` - 5. Verify the Gateway status - Confirm that the Gateway is running and has been assigned an address: + Ensure that the Gateway is running and has been assigned an address: ```bash kubectl get gateway inference-gateway ``` + + Check that the Gateway has been successfully provisioned and that its status shows Programmed=True 6. Deploy the HTTPRoute @@ -256,6 +252,16 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens ``` The route status should include Accepted=True and ResolvedRefs=True. + + 8. Verify the InferencePool Status + + Make sure the InferencePool is active before sending traffic. + + ```bash + kubectl describe inferencepools.inference.networking.k8s.io vllm-llama3-8b-instruct + ``` + + Check that the status shows Accepted=True and ResolvedRefs=True. This confirms the InferencePool is ready to handle traffic. For more information, see the [NGINX Gateway Fabric - Inference Gateway Setup guide](https://docs.nginx.com/nginx-gateway-fabric/how-to/gateway-api-inference-extension/#overview) @@ -356,36 +362,21 @@ Deploy the sample InferenceObjective which allows you to specify priority of req Follow these steps to remove the NGINX Gateway Fabric Inference Gateway and all related resources. - 1. Remove Inference resources InferencePool, InferenceObjective, and model server resources: - - ```bash - helm uninstall vllm-llama3-8b-instruct - kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferenceobjective.yaml --ignore-not-found - kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found - kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found - kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/sim-deployment.yaml --ignore-not-found - ``` - - 2. Delete Gateway API Inference Extension CRDs: - - ```bash - kubectl delete -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd --ignore-not-found - ``` - 3. Remove Inference Gateway and HTTPRoute: + 1. Remove Inference Gateway and HTTPRoute: ```bash kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/gateway.yaml --ignore-not-found kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/httproute.yaml --ignore-not-found ``` - 4. Uninstall NGINX Gateway Fabric: + 2. Uninstall NGINX Gateway Fabric: ```bash helm uninstall ngf -n nginx-gateway ``` - 5. Clean up namespace and CRDs: + 3. Clean up namespace and CRDs: ```bash kubectl delete ns nginx-gateway From b0fbcc8cb5d853741109db68dc6af6d2dc78e757 Mon Sep 17 00:00:00 2001 From: Sindhu Shiv Date: Tue, 4 Nov 2025 14:15:42 -0800 Subject: [PATCH 4/9] docs: Fixed the helm command --- site-src/_includes/epp-latest.md | 4 +++- site-src/guides/getting-started-latest.md | 23 +++++++---------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/site-src/_includes/epp-latest.md b/site-src/_includes/epp-latest.md index 351af5ae7..e090778f0 100644 --- a/site-src/_includes/epp-latest.md +++ b/site-src/_includes/epp-latest.md @@ -34,8 +34,10 @@ === "NGINX Gateway Fabric" ```bash + export GATEWAY_PROVIDER=none helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ + --set provider.name=$GATEWAY_PROVIDER \ --version $IGW_CHART_VERSION \ - oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool + oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool ``` \ No newline at end of file diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md index 8a5450793..1d10101c9 100644 --- a/site-src/guides/getting-started-latest.md +++ b/site-src/guides/getting-started-latest.md @@ -203,29 +203,20 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens - [Helm](https://helm.sh/docs/intro/install/) installed. - A Kubernetes cluster with LoadBalancer or NodePort access. - 2. Install the Inference Extension CRDs - - ```bash - kubectl kustomize "https://github.com/nginx/nginx-gateway-fabric/config/crd/inference-extension/?ref=v2.2.0" | kubectl apply -f - - ``` - - 3. Install NGINX Gateway Fabric with the Inference Extension enabled by setting the `nginxGateway.gwAPIInferenceExtension.enable=true` Helm value + 2. Install NGINX Gateway Fabric with the Inference Extension enabled by setting the `nginxGateway.gwAPIInferenceExtension.enable=true` Helm value ```bash - helm repo add nginx-stable https://helm.nginx.com/stable - helm upgrade -i nginx-gateway-fabric nginx-stable/nginx-gateway-fabric \ - --namespace nginx-gateway --create-namespace \ - --set nginxGateway.gwAPIInferenceExtension.enable=true + helm install ngf oci://ghcr.io/nginx/charts/nginx-gateway-fabric --create-namespace -n nginx-gateway --set nginxGateway.gwAPIInferenceExtension.enable=true ``` This enables NGINX Gateway Fabric to watch and manage Inference Extension resources such as InferencePool and InferenceObjective. - 4. Deploy the Gateway + 3. Deploy the Gateway ```bash kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/gateway.yaml ``` - 5. Verify the Gateway status + 4. Verify the Gateway status Ensure that the Gateway is running and has been assigned an address: @@ -235,7 +226,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens Check that the Gateway has been successfully provisioned and that its status shows Programmed=True - 6. Deploy the HTTPRoute + 5. Deploy the HTTPRoute Create the HTTPRoute resource to route traffic to your InferencePool: @@ -243,7 +234,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/httproute.yaml ``` - 7. Verify the route status + 6. Verify the route status Check that the HTTPRoute was successfully configured and references were resolved: @@ -253,7 +244,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens The route status should include Accepted=True and ResolvedRefs=True. - 8. Verify the InferencePool Status + 7. Verify the InferencePool Status Make sure the InferencePool is active before sending traffic. From 308bf89fc545171c788c7d01dbb153bfb5400b75 Mon Sep 17 00:00:00 2001 From: Sindhu Shiv Date: Tue, 4 Nov 2025 14:33:12 -0800 Subject: [PATCH 5/9] docs: Fixed cleaned up command --- site-src/guides/getting-started-latest.md | 1 - 1 file changed, 1 deletion(-) diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md index 1d10101c9..42bfd7743 100644 --- a/site-src/guides/getting-started-latest.md +++ b/site-src/guides/getting-started-latest.md @@ -371,5 +371,4 @@ Deploy the sample InferenceObjective which allows you to specify priority of req ```bash kubectl delete ns nginx-gateway - kubectl delete -f https://raw.githubusercontent.com/nginx/nginx-gateway-fabric/v2.2.0/deploy/crds.yaml ``` \ No newline at end of file From d0e33d6a84eeb39db709ae8078b320c22b1d18d0 Mon Sep 17 00:00:00 2001 From: Sindhu Shiv Date: Tue, 4 Nov 2025 14:35:45 -0800 Subject: [PATCH 6/9] docs: Fixed cleaned up command --- site-src/guides/getting-started-latest.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md index 42bfd7743..31cd12d6b 100644 --- a/site-src/guides/getting-started-latest.md +++ b/site-src/guides/getting-started-latest.md @@ -367,7 +367,7 @@ Deploy the sample InferenceObjective which allows you to specify priority of req helm uninstall ngf -n nginx-gateway ``` - 3. Clean up namespace and CRDs: + 3. Clean up namespace: ```bash kubectl delete ns nginx-gateway From 38885309bcdd034ed87190ad3848eeeb1cc933b3 Mon Sep 17 00:00:00 2001 From: Sindhu Shiv Date: Tue, 4 Nov 2025 15:06:43 -0800 Subject: [PATCH 7/9] docs: Adding released version --- site-src/_includes/epp.md | 11 +++++ site-src/guides/index.md | 91 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) diff --git a/site-src/_includes/epp.md b/site-src/_includes/epp.md index 73e24786f..a82ca68d3 100644 --- a/site-src/_includes/epp.md +++ b/site-src/_includes/epp.md @@ -30,3 +30,14 @@ --version $IGW_CHART_VERSION \ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool ``` + +=== "NGINX Gateway Fabric" + + ```bash + export GATEWAY_PROVIDER=none + helm install vllm-llama3-8b-instruct \ + --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ + --set provider.name=$GATEWAY_PROVIDER \ + --version $IGW_CHART_VERSION \ + oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool + ``` diff --git a/site-src/guides/index.md b/site-src/guides/index.md index 0dab6ba76..cf4980e9e 100644 --- a/site-src/guides/index.md +++ b/site-src/guides/index.md @@ -87,6 +87,22 @@ kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extens helm upgrade -i --namespace kgateway-system --version $KGTW_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true ``` +=== "NGINX Gateway Fabric" + + 1. Requirements + + - Gateway API [CRDs](https://gateway-api.sigs.k8s.io/guides/#installing-gateway-api) installed (Standard or Experimental channel). + - [Helm](https://helm.sh/docs/intro/install/) installed. + - A Kubernetes cluster with LoadBalancer or NodePort access. + + 2. Install NGINX Gateway Fabric with the Inference Extension enabled by setting the `nginxGateway.gwAPIInferenceExtension.enable=true` Helm value + + ```bash + helm install ngf oci://ghcr.io/nginx/charts/nginx-gateway-fabric --create-namespace -n nginx-gateway --set nginxGateway.gwAPIInferenceExtension.enable=true + ``` + This enables NGINX Gateway Fabric to watch and manage Inference Extension resources such as InferencePool and InferenceObjective. + + ### Deploy the InferencePool and Endpoint Picker Extension Install an InferencePool named `vllm-llama3-8b-instruct` that selects from endpoints with label `app: vllm-llama3-8b-instruct` and listening on port 8000. The Helm install command automatically installs the endpoint-picker, InferencePool along with provider specific resources. @@ -200,6 +216,57 @@ kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extens kubectl get httproute llm-route -o yaml ``` +=== "NGINX Gateway Fabric" + + NGINX Gateway Fabric is an implementation of the Gateway API that supports the Inference Extension. Follow these steps to deploy an Inference Gateway using NGINX Gateway Fabric. + + 1. Deploy the Gateway + + ```bash + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/gateway.yaml + ``` + + 2. Verify the Gateway status + + Ensure that the Gateway is running and has been assigned an address: + + ```bash + kubectl get gateway inference-gateway + ``` + + Check that the Gateway has been successfully provisioned and that its status shows Programmed=True + + 3. Deploy the HTTPRoute + + Create the HTTPRoute resource to route traffic to your InferencePool: + + ```bash + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/httproute.yaml + ``` + + 4. Verify the route status + + Check that the HTTPRoute was successfully configured and references were resolved: + + ```bash + kubectl get httproute llm-route -o yaml + ``` + + The route status should include Accepted=True and ResolvedRefs=True. + + 5. Verify the InferencePool Status + + Make sure the InferencePool is active before sending traffic. + + ```bash + kubectl describe inferencepools.inference.networking.k8s.io vllm-llama3-8b-instruct + ``` + + Check that the status shows Accepted=True and ResolvedRefs=True. This confirms the InferencePool is ready to handle traffic. + + For more information, see the [NGINX Gateway Fabric - Inference Gateway Setup guide](https://docs.nginx.com/nginx-gateway-fabric/how-to/gateway-api-inference-extension/#overview) + + ### Deploy InferenceObjective (Optional) Deploy the sample InferenceObjective which allows you to specify priority of requests. @@ -293,3 +360,27 @@ Deploy the sample InferenceObjective which allows you to specify priority of req ```bash kubectl delete ns kgateway-system ``` + +=== "NGINX Gateway Fabric" + + Follow these steps to remove the NGINX Gateway Fabric Inference Gateway and all related resources. + + + 1. Remove Inference Gateway and HTTPRoute: + + ```bash + kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/gateway.yaml --ignore-not-found + kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/nginxgatewayfabric/httproute.yaml --ignore-not-found + ``` + + 2. Uninstall NGINX Gateway Fabric: + + ```bash + helm uninstall ngf -n nginx-gateway + ``` + + 3. Clean up namespace: + + ```bash + kubectl delete ns nginx-gateway + ``` From d3dceff26cfe6bec4af114ef1627411cca226666 Mon Sep 17 00:00:00 2001 From: Sindhu Shiv Date: Mon, 10 Nov 2025 08:42:33 -0800 Subject: [PATCH 8/9] docs: Fixing the YAML files for test failure --- .../gateway/nginxgatewayfabric/gateway.yaml | 8 ++++---- .../gateway/nginxgatewayfabric/httproute.yaml | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/config/manifests/gateway/nginxgatewayfabric/gateway.yaml b/config/manifests/gateway/nginxgatewayfabric/gateway.yaml index d70ec98a3..cdc1308ea 100644 --- a/config/manifests/gateway/nginxgatewayfabric/gateway.yaml +++ b/config/manifests/gateway/nginxgatewayfabric/gateway.yaml @@ -4,7 +4,7 @@ metadata: name: inference-gateway spec: gatewayClassName: nginx -listeners: -- name: http - port: 80 - protocol: HTTP + listeners: + - name: http + port: 80 + protocol: HTTP diff --git a/config/manifests/gateway/nginxgatewayfabric/httproute.yaml b/config/manifests/gateway/nginxgatewayfabric/httproute.yaml index 0cefa46bd..586d4ca8d 100644 --- a/config/manifests/gateway/nginxgatewayfabric/httproute.yaml +++ b/config/manifests/gateway/nginxgatewayfabric/httproute.yaml @@ -4,15 +4,15 @@ metadata: name: llm-route namespace: default spec: -parentRefs: -- name: inference-gateway -rules: -- matches: - - path: - type: PathPrefix - value: / - backendRefs: - - group: inference.networking.k8s.io + parentRefs: + - name: inference-gateway + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - group: inference.networking.k8s.io kind: InferencePool name: vllm-llama3-8b-instruct From 8e0382ecbdd66ba432677f472e474bb2591b5c34 Mon Sep 17 00:00:00 2001 From: Sindhu Shiv Date: Mon, 10 Nov 2025 10:31:39 -0800 Subject: [PATCH 9/9] docs: Fixing the HTTP YAML file for test failure --- config/manifests/gateway/nginxgatewayfabric/httproute.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/manifests/gateway/nginxgatewayfabric/httproute.yaml b/config/manifests/gateway/nginxgatewayfabric/httproute.yaml index 586d4ca8d..70c38873f 100644 --- a/config/manifests/gateway/nginxgatewayfabric/httproute.yaml +++ b/config/manifests/gateway/nginxgatewayfabric/httproute.yaml @@ -1,8 +1,8 @@ apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute metadata: -name: llm-route -namespace: default + name: llm-route + namespace: default spec: parentRefs: - name: inference-gateway