diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inferencepool/templates/_helpers.tpl
index fdc9b1a2b..676f96775 100644
--- a/config/charts/inferencepool/templates/_helpers.tpl
+++ b/config/charts/inferencepool/templates/_helpers.tpl
@@ -31,3 +31,17 @@ Selector labels
 {{- define "gateway-api-inference-extension.selectorLabels" -}}
 inferencepool: {{ include "gateway-api-inference-extension.name" . }}
 {{- end -}}
+
+{{/*
+Envoy Common labels
+*/}}
+{{- define "gateway-api-inference-extension.envoy-labels" -}}
+app.kubernetes.io/name: {{ include "gateway-api-inference-extension.name" . }}-envoy
+{{- end }}
+
+{{/*
+Envoy Selector labels
+*/}}
+{{- define "gateway-api-inference-extension.envoy-selectorLabels" -}}
+envoy: {{ include "gateway-api-inference-extension.name" . }}-envoy
+{{- end }}
diff --git a/config/charts/inferencepool/templates/envoy-service.yaml b/config/charts/inferencepool/templates/envoy-service.yaml
new file mode 100644
index 000000000..d8c2aafda
--- /dev/null
+++ b/config/charts/inferencepool/templates/envoy-service.yaml
@@ -0,0 +1,19 @@
+{{- if .Values.provider.standalone }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}-envoy
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.envoy-labels" . | nindent 4 }}
+spec:
+  selector:
+    {{- include "gateway-api-inference-extension.envoy-selectorLabels" . | nindent 4 }}
+  ports:
+    - name: http
+      port: {{ .Values.provider.standalone.envoy.servicePort | default 8081 }}
+      protocol: TCP
+      targetPort: {{ .Values.provider.standalone.envoy.servicePort | default 8081 }}
+  type: ClusterIP
+{{- end }}
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
index de892337d..d3177f552 100644
--- a/config/charts/inferencepool/templates/epp-deployment.yaml
+++ b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -1,3 +1,5 @@
+{{- if ne (lower .Values.provider.name) "standalone" }}
+ ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -151,3 +153,4 @@ spec:
       tolerations:
         {{- toYaml .Values.inferenceExtension.tolerations | nindent 8 }}
       {{- end }}
+{{- end }}
diff --git a/config/charts/inferencepool/templates/epp-standalone-deployment.yaml b/config/charts/inferencepool/templates/epp-standalone-deployment.yaml
new file mode 100644
index 000000000..559fb7219
--- /dev/null
+++ b/config/charts/inferencepool/templates/epp-standalone-deployment.yaml
@@ -0,0 +1,410 @@
+{{- if eq (lower .Values.provider.name) "standalone" }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}-envoy
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{ include "gateway-api-inference-extension.envoy-labels" . | nindent 4 }}
+data:
+  envoy.yaml: |
+    admin:
+      address:
+        socket_address:
+          address: 127.0.0.1
+          port_value: 19000
+      access_log:
+        - name: envoy.access_loggers.file
+          typed_config:
+            "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+            path: /dev/null
+    static_resources:
+      listeners:
+        - name: envoy-proxy-ready-0.0.0.0-19001
+          address:
+            socket_address:
+              address: 0.0.0.0
+              port_value: 19001
+          filter_chains:
+            - filters:
+                - name: envoy.filters.network.http_connection_manager
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                    stat_prefix: envoy-ready-http
+                    route_config:
+                      name: local_route
+                      virtual_hosts:
+                        - name: prometheus_stats
+                          domains: ["*"]
+                          routes:
+                            - match:
+                                prefix: "/stats/prometheus"
+                              route:
+                                cluster: "prometheus_stats"
+                    http_filters:
+                      - name: envoy.filters.http.health_check
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.http.health_check.v3.HealthCheck
+                          pass_through_mode: false
+                          headers:
+                            - name: ":path"
+                              string_match:
+                                exact: "/ready"
+                      - name: envoy.filters.http.router
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+        - name: vllm
+          address:
+            socket_address:
+              address: 0.0.0.0
+              port_value: 8081
+          per_connection_buffer_limit_bytes: 32768
+          access_log:
+            - name: envoy.access_loggers.file
+              filter:
+                response_flag_filter:
+                  flags: ["NR"]
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+                path: /dev/stdout
+                log_format:
+                  text_format_source:
+                    inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n"
+          filter_chains:
+            - name: vllm
+              filters:
+                - name: envoy.filters.network.http_connection_manager
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                    stat_prefix: http-8081
+                    route_config:
+                      name: vllm
+                      virtual_hosts:
+                        - name: vllm-default
+                          domains: ["*"]
+                          routes:
+                            - match:
+                                prefix: "/"
+                              route:
+                                cluster: original_destination_cluster
+                                timeout: 86400s
+                                idle_timeout: 86400s
+                                upgrade_configs:
+                                  - upgrade_type: websocket
+                              typed_per_filter_config:
+                                envoy.filters.http.ext_proc:
+                                  "@type": type.googleapis.com/envoy.config.route.v3.FilterConfig
+                                  config: {}
+                    http_filters:
+                      - name: envoy.filters.http.ext_proc
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+                          grpc_service:
+                            envoy_grpc:
+                              cluster_name: ext_proc
+                              authority: localhost:9002
+                            timeout: 10s
+                          processing_mode:
+                            request_header_mode: SEND
+                            response_header_mode: SEND
+                            request_body_mode: FULL_DUPLEX_STREAMED
+                            response_body_mode: FULL_DUPLEX_STREAMED
+                            request_trailer_mode: SEND
+                            response_trailer_mode: SEND
+                          message_timeout: 1000s
+                      - name: envoy.filters.http.router
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+                          suppress_envoy_headers: true
+                    http2_protocol_options:
+                      max_concurrent_streams: 100
+                      initial_stream_window_size: 65536
+                      initial_connection_window_size: 1048576
+                    use_remote_address: true
+                    normalize_path: true
+                    merge_slashes: true
+                    server_header_transformation: PASS_THROUGH
+                    common_http_protocol_options:
+                      headers_with_underscores_action: REJECT_REQUEST
+                    path_with_escaped_slashes_action: UNESCAPE_AND_REDIRECT
+                    access_log:
+                      - name: envoy.access_loggers.file
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+                          path: /dev/stdout
+                          log_format:
+                            text_format_source:
+                              inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n"
+      clusters:
+        - name: prometheus_stats
+          type: STATIC
+          connect_timeout: 0.250s
+          load_assignment:
+            cluster_name: prometheus_stats
+            endpoints:
+              - lb_endpoints:
+                  - endpoint:
+                      address:
+                        socket_address:
+                          address: 127.0.0.1
+                          port_value: 19000
+        - name: original_destination_cluster
+          type: ORIGINAL_DST
+          connect_timeout: 1000s
+          lb_policy: CLUSTER_PROVIDED
+          circuit_breakers:
+            thresholds:
+              - max_connections: 40000
+                max_pending_requests: 40000
+                max_requests: 40000
+          original_dst_lb_config:
+            use_http_header: true
+            http_header_name: x-gateway-destination-endpoint
+        - name: ext_proc
+          type: STATIC
+          connect_timeout: 86400s
+          lb_policy: LEAST_REQUEST
+          circuit_breakers:
+            thresholds:
+              - max_connections: 40000
+                max_pending_requests: 40000
+                max_requests: 40000
+                max_retries: 1024
+          health_checks:
+            - timeout: 2s
+              interval: 10s
+              unhealthy_threshold: 3
+              healthy_threshold: 2
+              reuse_connection: true
+              grpc_health_check:
+                service_name: "envoy.service.ext_proc.v3.ExternalProcessor"
+              tls_options:
+                alpn_protocols: ["h2"]
+          transport_socket:
+            name: "envoy.transport_sockets.tls"
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
+              common_tls_context:
+                validation_context:
+          typed_extension_protocol_options:
+            envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+              "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+              explicit_http_config:
+                http2_protocol_options:
+                  initial_stream_window_size: 65536
+                  initial_connection_window_size: 1048576
+          load_assignment:
+            cluster_name: ext_proc
+            endpoints:
+              - locality:
+                  region: ext_proc/e2e/0
+                lb_endpoints:
+                  - endpoint:
+                      address:
+                        socket_address:
+                          address: 127.0.0.1
+                          port_value: 9002
+                    load_balancing_weight: 1
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}-envoy
+  namespace:  {{ .Release.Namespace }}
+  labels: {{ include "gateway-api-inference-extension.envoy-labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.provider.standalone.envoy.replicas | default 1 }}
+  selector:
+    matchLabels:
+      {{ include "gateway-api-inference-extension.envoy-selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels: {{ include "gateway-api-inference-extension.envoy-selectorLabels" . | nindent 8 }}
+      annotations:
+        prometheus.io/path: /stats/prometheus
+        prometheus.io/port: "19001" # This still correctly refers to the envoy container's metrics port
+        prometheus.io/scrape: "true"
+    spec:
+      serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
+      terminationGracePeriodSeconds: 130
+      containers:
+        - name: envoy
+          image: {{ .Values.provider.standalone.envoy.image.hub }}/{{ .Values.provider.standalone.envoy.image.name }}:{{ .Values.provider.standalone.envoy.image.tag }}
+          imagePullPolicy: {{ .Values.provider.standalone.envoy.image.pullPolicy | default "Always" }}
+          args:
+            - "--service-cluster"
+            - "{{.Release.Namespace}}/inference-gateway"
+            - "--service-node"
+            - "envoy"
+            - "--log-level"
+            - "trace"
+            - "--cpuset-threads"
+            - "--drain-strategy"
+            - "immediate"
+            - "--drain-time-s"
+            - "60"
+            - "-c"
+            - "/etc/envoy/envoy.yaml"
+          command:
+            - envoy
+          env:
+            - name: ENVOY_NS_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+            - name: ENVOY_POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+          ports:
+            - containerPort: 8081
+              name: http-8081
+            - containerPort: 19001
+              name: metrics # <-- Envoy's metrics port
+          readinessProbe:
+            failureThreshold: 1
+            httpGet:
+              path: /ready
+              port: 19001
+              scheme: HTTP
+            periodSeconds: 5
+            successThreshold: 1
+            timeoutSeconds: 1
+          resources:
+            requests:
+              cpu: 100m
+              memory: 512Mi
+          volumeMounts:
+            - name: envoy-config-volume
+              mountPath: /etc/envoy
+              readOnly: true
+        - name: epp
+          image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
+          imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
+          args:
+            - --pool-name
+            - {{ .Release.Name }}
+            - --pool-namespace
+            - {{ .Release.Namespace }}
+            {{- if ne .Values.inferencePool.apiVersion "inference.networking.k8s.io" }}
+            - --pool-group
+            - "{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"
+            {{- end }}
+            - --zap-encoder
+            - "json"
+            - --config-file
+            - "/config/{{ .Values.inferenceExtension.pluginsConfigFile }}"
+            {{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
+            - --total-queued-requests-metric
+            - "nv_trt_llm_request_metrics{request_type=waiting}"
+            - --kv-cache-usage-percentage-metric
+            - "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}"
+            - --lora-info-metric
+            - "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet.
+            {{- end }}
+            {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
+            - --ha-enable-leader-election
+            {{- end }}
+            # Pass additional flags via the inferenceExtension.flags field in values.yaml.
+            {{- range .Values.inferenceExtension.flags }}
+            - "--{{ .name }}"
+            - "{{ .value }}"
+            {{- end }}
+            {{- if .Values.inferenceExtension.tracing.enabled }}
+            - --tracing=true
+            {{- else }}
+            - --tracing=false
+            {{- end }}
+            {{- if not .Values.inferenceExtension.monitoring.prometheus.enabled }}
+            - --metrics-endpoint-auth=false
+            {{- end }}
+          ports:
+            - name: grpc
+              containerPort: 9002
+            - name: grpc-health
+              containerPort: 9003
+            - name: metrics
+              containerPort: 9090
+          {{- if .Values.inferenceExtension.extraContainerPorts }}
+          {{- toYaml .Values.inferenceExtension.extraContainerPorts | nindent 8 }}
+          {{- end }}
+          livenessProbe:
+            {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
+            grpc:
+              port: 9003
+              service: liveness
+            {{- else }}
+            grpc:
+              port: 9003
+              service: inference-extension
+            {{- end }}
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          readinessProbe:
+            {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
+            grpc:
+              port: 9003
+              service: readiness
+            {{- else }}
+            grpc:
+              port: 9003
+              service: inference-extension
+            {{- end }}
+            periodSeconds: 2
+
+          env:
+            - name: NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+          {{- if .Values.inferenceExtension.tracing.enabled }}
+            - name: OTEL_SERVICE_NAME
+              value: "gateway-api-inference-extension"
+            - name: OTEL_EXPORTER_OTLP_ENDPOINT
+              value: {{ .Values.inferenceExtension.tracing.otelExporterEndpoint | quote }}
+            - name: OTEL_TRACES_EXPORTER
+              value: "otlp"
+            - name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME
+              valueFrom:
+                fieldRef:
+                  apiVersion: v1
+                  fieldPath: spec.nodeName
+            - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
+              valueFrom:
+                fieldRef:
+                  apiVersion: v1
+                  fieldPath: metadata.name
+            - name: OTEL_RESOURCE_ATTRIBUTES
+              value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)'
+            - name: OTEL_TRACES_SAMPLER
+              value: {{ .Values.inferenceExtension.tracing.sampling.sampler | quote }}
+            - name: OTEL_TRACES_SAMPLER_ARG
+              value: {{ .Values.inferenceExtension.tracing.sampling.samplerArg | quote }}
+          {{- end }}
+          {{- if .Values.inferenceExtension.env }}
+          {{- toYaml .Values.inferenceExtension.env | nindent 8 }}
+          {{- end }}
+          volumeMounts:
+            - name: plugins-config-volume
+              mountPath: "/config"
+      volumes:
+        - name: envoy-config-volume
+          configMap:
+            name: {{ include "gateway-api-inference-extension.name" . }}-envoy
+            items:
+              - key: envoy.yaml
+                path: envoy.yaml
+        - name: plugins-config-volume
+          configMap:
+            name: {{ include "gateway-api-inference-extension.name" . }}
+      {{- if .Values.inferenceExtension.affinity }}
+      affinity:
+        {{- toYaml .Values.inferenceExtension.affinity | nindent 8 }}
+      {{- end }}
+      {{- if .Values.inferenceExtension.tolerations }}
+      tolerations:
+        {{- toYaml .Values.inferenceExtension.tolerations | nindent 8 }}
+      {{- end }}
+---
+{{- end }}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
index 8b3385ab1..c2ad0ff96 100644
--- a/config/charts/inferencepool/values.yaml
+++ b/config/charts/inferencepool/values.yaml
@@ -76,16 +76,26 @@ inferencePool:
   # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
   targetPortNumber: 8000
 
-# Options: ["gke", "istio", "none"]
+# Options: ["gke", "istio", "standalone", "none"]
 provider:
   name: none
-
   # GKE-specific configuration.
   # This block is only used if name is "gke".
   gke:
     # Set to true if the cluster is an Autopilot cluster.
     autopilot: false
 
+  standalone:
+    replicas: 1
+    envoy:
+      image:
+        name: envoy
+        hub: docker.io/envoyproxy
+        tag: distroless-v1.33.2
+        pullPolicy: Always
+      servicePort: 8081
+
+
 istio:
   destinationRule:
     # Provide a way to override the default calculated host
@@ -94,4 +104,6 @@ istio:
     trafficPolicy: {}
       # connectionPool:
       #   http:
-      #     maxRequestsPerConnection: 256000
\ No newline at end of file
+      #     maxRequestsPerConnection: 256000
+
+
diff --git a/site-src/_includes/epp-latest.md b/site-src/_includes/epp-latest.md
index ef08a61be..9ccfe0e93 100644
--- a/site-src/_includes/epp-latest.md
+++ b/site-src/_includes/epp-latest.md
@@ -30,3 +30,14 @@
       --version $IGW_CHART_VERSION \
       oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool
       ```
+=== "Standalone EPP"
+      ```bash
+      export GATEWAY_PROVIDER=none
+      helm install vllm-llama3-8b-instruct \
+      --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
+      --set provider.name=$GATEWAY_PROVIDER \
+      --set provider.standalone=true \
+      --version $IGW_CHART_VERSION \
+      oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool
+      ```
+
diff --git a/site-src/_includes/epp.md b/site-src/_includes/epp.md
index 73e24786f..4872b910f 100644
--- a/site-src/_includes/epp.md
+++ b/site-src/_includes/epp.md
@@ -30,3 +30,14 @@
       --version $IGW_CHART_VERSION \
       oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool
       ```
+=== "Standalone EPP"
+
+      ```bash
+      export GATEWAY_PROVIDER=none
+      helm install vllm-llama3-8b-instruct \
+      --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
+      --set provider.name=$GATEWAY_PROVIDER \
+      --set provider.standalone=true \
+      --version $IGW_CHART_VERSION \
+      oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool
+      ```
\ No newline at end of file
diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md
index bf7413b8e..e605da71a 100644
--- a/site-src/guides/getting-started-latest.md
+++ b/site-src/guides/getting-started-latest.md
@@ -43,7 +43,7 @@
 kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd
 ```
 
-### Install the Gateway
+### Install the Gateway if you are not using Standalone EPP
 
    Choose one of the following options to install Gateway.
 
@@ -91,7 +91,9 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens
          ```bash
          helm upgrade -i --namespace kgateway-system --version $KGTW_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true
          ```
-
+=== "Standalone EPP"
+    Nothing to install here as you don't need a gateway
+   
 ### Deploy the InferencePool and Endpoint Picker Extension
 
    Install an InferencePool named `vllm-llama3-8b-instruct` that selects from endpoints with label `app: vllm-llama3-8b-instruct` and listening on port 8000. The Helm install command automatically installs the endpoint-picker, InferencePool along with provider specific resources.
@@ -104,7 +106,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens
 
 --8<-- "site-src/_includes/epp-latest.md"
 
-### Deploy an Inference Gateway
+### Deploy an Inference Gateway if not using Standalone EPP
 
    Choose one of the following options to deploy an Inference Gateway.
 
@@ -199,6 +201,10 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens
          kubectl get httproute llm-route -o yaml
          ```
 
+=== "Standalone EPP"
+
+        Nothing is needed.
+
 ### Deploy InferenceObjective (Optional)
 
 Deploy the sample InferenceObjective which allows you to specify priority of requests.
@@ -290,3 +296,6 @@ Deploy the sample InferenceObjective which allows you to specify priority of req
          ```bash
          kubectl delete ns kgateway-system
          ```
+=== "Standalone EPP"
+
+     N/A
\ No newline at end of file