From 49c9056ea7d0755247b668548892aeae2d0bf73b Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Thu, 6 Nov 2025 09:34:35 +0200
Subject: [PATCH 1/9] Init commit

Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
---
 vllm_gaudi/extension/features.py         | 1 -
 vllm_gaudi/extension/runtime.py          | 2 ++
 vllm_gaudi/v1/worker/hpu_model_runner.py | 8 ++++++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/vllm_gaudi/extension/features.py b/vllm_gaudi/extension/features.py
index 9112ab235..de392d4a9 100644
--- a/vllm_gaudi/extension/features.py
+++ b/vllm_gaudi/extension/features.py
@@ -12,7 +12,6 @@
 
 def get_user_flags():
     flags = [
-        Env('VLLM_USE_V1', boolean),
         Env('VLLM_ENABLE_EXPERIMENTAL_FLAGS', boolean),
         Env('VLLM_EXPONENTIAL_BUCKETING', boolean),
         Env('VLLM_PROMPT_BS_BUCKET_MIN', int),
diff --git a/vllm_gaudi/extension/runtime.py b/vllm_gaudi/extension/runtime.py
index 629a1bcb1..1f267bdce 100644
--- a/vllm_gaudi/extension/runtime.py
+++ b/vllm_gaudi/extension/runtime.py
@@ -63,6 +63,7 @@ def finalize_config():
 
     user_flags = filter_defined(detected, USER_FLAGS)
     experimental_flags = filter_defined(detected, EXPERIMENTAL_FLAGS)
+    experimental_flags = [flag for flag in experimental_flags if flag not in user_flags]
     environment_values = filter_defined(detected, ENVIRONMENT_VALUES)
     feature_values = filter_defined(detected, FEATURE_VALUES)
 
@@ -77,6 +78,7 @@ def finalize_config():
             "From v0.12.0 release using those flags without VLLM_ENABLE_EXPERIMENTAL_FLAGS will trigger a fatal error.")
         logger().warning(footer)
 
+
     dump('Environment', environment_values)
     dump('Features', feature_values)
     dump('User flags', user_flags)
diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py
index d93aae129..00bb49580 100644
--- a/vllm_gaudi/v1/worker/hpu_model_runner.py
+++ b/vllm_gaudi/v1/worker/hpu_model_runner.py
@@ -3743,11 +3743,14 @@ def warmup_defragmenter(self):
         logger.info("Defragmenter warmup completed successfully")
 
     def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_batch_seq=0.001):
+        from tqdm import tqdm
+
         total_mem = starting_mem
         idx = 0
         num_candidates = len(buckets)
         captured_all = True
-        for idx, (batch_size, seq_len, num_blocks) in enumerate(reversed(buckets)):
+        developer_settings = get_config().VLLM_ENABLE_EXPERIMENTAL_FLAGS
+        for idx, (batch_size, seq_len, num_blocks) in tqdm(enumerate(reversed(buckets)), desc="Processing warmup"):
             if seq_len > self.max_num_tokens:
                 continue
             # Graph memory usage is proportional to seq dimension in a batch
@@ -3761,7 +3764,8 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
             if graphed_bucket in self.graphed_buckets:
                 continue
             self.graphed_buckets.add(graphed_bucket)
-            self.log_warmup(phase, idx, num_candidates, batch_size, seq_len, num_blocks)
+            if developer_settings:
+                self.log_warmup(phase, idx, num_candidates, batch_size, seq_len, num_blocks)                
             prompt_cfg, decode_cfg = None, None
             with HabanaMemoryProfiler() as mem_prof:
                 if is_prompt:

From 6c25155c128a656ca7c0557d2b81faac35011d38 Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Thu, 6 Nov 2025 15:43:21 +0200
Subject: [PATCH 2/9] Update progress bar

Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
---
 vllm_gaudi/v1/worker/hpu_model_runner.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py
index 00bb49580..95e052e7e 100644
--- a/vllm_gaudi/v1/worker/hpu_model_runner.py
+++ b/vllm_gaudi/v1/worker/hpu_model_runner.py
@@ -3597,7 +3597,7 @@ def log_warmup(self, phase, i, max_i, first_dim, second_dim, third_dim, causal=F
                    f"query_len:{second_dim} "
                    f"num_blocks:{third_dim} "
                    f"free_mem:{free_mem}")
-        logger.info(msg)
+        tqdm.write(msg)
 
     def log_warmup_multimodal(self, phase, i, max_i, batch_size, seq_len, img_args):
         free_mem = format_bytes(HabanaMemoryProfiler.current_free_device_memory())
@@ -3750,7 +3750,9 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
         num_candidates = len(buckets)
         captured_all = True
         developer_settings = get_config().VLLM_ENABLE_EXPERIMENTAL_FLAGS
-        for idx, (batch_size, seq_len, num_blocks) in tqdm(enumerate(reversed(buckets)), desc="Processing warmup"):
+        for idx, (batch_size, seq_len, num_blocks) in tqdm(enumerate(reversed(buckets)), 
+                                                           desc="Processing warmup",
+                                                           unit="item"):
             if seq_len > self.max_num_tokens:
                 continue
             # Graph memory usage is proportional to seq dimension in a batch
@@ -3778,6 +3780,9 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
             total_mem += used_mem
             total_batch_seq += batch_seq
 
+            pbar.set_postfix_str(f"{idx}/{num_candidates}")
+            pbar.update(1)
+
         return total_mem, total_batch_seq, captured_all
 
     def warmup_unified_graphs(self, buckets, kv_cache):

From 3873e74deed48574a3c65aa77ecb34be177ac092 Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Thu, 6 Nov 2025 15:52:17 +0200
Subject: [PATCH 3/9] Fix

Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
---
 vllm_gaudi/v1/worker/hpu_model_runner.py | 62 ++++++++++++------------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py
index 95e052e7e..d450df118 100644
--- a/vllm_gaudi/v1/worker/hpu_model_runner.py
+++ b/vllm_gaudi/v1/worker/hpu_model_runner.py
@@ -8,6 +8,7 @@
 import os
 import sys
 import time
+from tqdm import tqdm
 from dataclasses import dataclass, field, fields
 from typing import (TYPE_CHECKING, Any, Callable, Optional, TypeAlias, Union, cast)
 
@@ -3743,45 +3744,42 @@ def warmup_defragmenter(self):
         logger.info("Defragmenter warmup completed successfully")
 
     def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_batch_seq=0.001):
-        from tqdm import tqdm
-
         total_mem = starting_mem
         idx = 0
         num_candidates = len(buckets)
         captured_all = True
         developer_settings = get_config().VLLM_ENABLE_EXPERIMENTAL_FLAGS
-        for idx, (batch_size, seq_len, num_blocks) in tqdm(enumerate(reversed(buckets)), 
-                                                           desc="Processing warmup",
-                                                           unit="item"):
-            if seq_len > self.max_num_tokens:
-                continue
-            # Graph memory usage is proportional to seq dimension in a batch
-            phase = f"Graph/{'prompt' if is_prompt else 'decode'}"
-            if is_prompt:
-                batch_seq = batch_size * seq_len * num_blocks if num_blocks else batch_size * seq_len
-            else:
-                batch_seq = batch_size
-
-            graphed_bucket = (batch_size, seq_len, num_blocks, is_prompt)
-            if graphed_bucket in self.graphed_buckets:
-                continue
-            self.graphed_buckets.add(graphed_bucket)
-            if developer_settings:
-                self.log_warmup(phase, idx, num_candidates, batch_size, seq_len, num_blocks)                
-            prompt_cfg, decode_cfg = None, None
-            with HabanaMemoryProfiler() as mem_prof:
+        with tqdm(total=num_candidates, desc="Processing warmup", unit="item"):
+            for idx, (batch_size, seq_len, num_blocks) in enumerate(reversed(buckets)):
+                if seq_len > self.max_num_tokens:
+                    continue
+                # Graph memory usage is proportional to seq dimension in a batch
+                phase = f"Graph/{'prompt' if is_prompt else 'decode'}"
                 if is_prompt:
-                    prompt_cfg = (batch_size, seq_len, num_blocks)
+                    batch_seq = batch_size * seq_len * num_blocks if num_blocks else batch_size * seq_len
                 else:
-                    decode_cfg = (batch_size, 1, num_blocks)
-                self._prepare_dummy_scenario(prompt_cfg, decode_cfg)
-            # TODO(kzawora): align_workers
-            used_mem = mem_prof.consumed_device_memory
-            total_mem += used_mem
-            total_batch_seq += batch_seq
-
-            pbar.set_postfix_str(f"{idx}/{num_candidates}")
-            pbar.update(1)
+                    batch_seq = batch_size
+
+                graphed_bucket = (batch_size, seq_len, num_blocks, is_prompt)
+                if graphed_bucket in self.graphed_buckets:
+                    continue
+                self.graphed_buckets.add(graphed_bucket)
+                if developer_settings:
+                    self.log_warmup(phase, idx, num_candidates, batch_size, seq_len, num_blocks)                
+                prompt_cfg, decode_cfg = None, None
+                with HabanaMemoryProfiler() as mem_prof:
+                    if is_prompt:
+                        prompt_cfg = (batch_size, seq_len, num_blocks)
+                    else:
+                        decode_cfg = (batch_size, 1, num_blocks)
+                    self._prepare_dummy_scenario(prompt_cfg, decode_cfg)
+                # TODO(kzawora): align_workers
+                used_mem = mem_prof.consumed_device_memory
+                total_mem += used_mem
+                total_batch_seq += batch_seq
+
+                pbar.set_postfix_str(f"{idx}/{num_candidates}")
+                pbar.update(1)
 
         return total_mem, total_batch_seq, captured_all
 

From 91b2bebe9af5db0b83644d6f0bba980d5e64a8d9 Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Thu, 6 Nov 2025 16:04:33 +0200
Subject: [PATCH 4/9] Fix and ua progress bar

Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
---
 vllm_gaudi/v1/worker/hpu_model_runner.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py
index d450df118..770a88daf 100644
--- a/vllm_gaudi/v1/worker/hpu_model_runner.py
+++ b/vllm_gaudi/v1/worker/hpu_model_runner.py
@@ -3749,12 +3749,13 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
         num_candidates = len(buckets)
         captured_all = True
         developer_settings = get_config().VLLM_ENABLE_EXPERIMENTAL_FLAGS
-        with tqdm(total=num_candidates, desc="Processing warmup", unit="item"):
+        phase = {'Prompt' if is_prompt else 'Decode'}
+        desc = phase + " warmup processing: "
+        with tqdm(total=num_candidates, desc=desc, unit="item") as pbar:
             for idx, (batch_size, seq_len, num_blocks) in enumerate(reversed(buckets)):
                 if seq_len > self.max_num_tokens:
                     continue
                 # Graph memory usage is proportional to seq dimension in a batch
-                phase = f"Graph/{'prompt' if is_prompt else 'decode'}"
                 if is_prompt:
                     batch_seq = batch_size * seq_len * num_blocks if num_blocks else batch_size * seq_len
                 else:
@@ -3786,13 +3787,16 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
     def warmup_unified_graphs(self, buckets, kv_cache):
         idx = 0
         num_candidates = len(buckets)
-        for idx, (query, shared_ctx, unique_ctx, is_causal) in enumerate(reversed(buckets)):
-            unified_cfg = (query, shared_ctx, unique_ctx, is_causal)
-            if unified_cfg in self.graphed_buckets:
-                continue
-            self.graphed_buckets.add(unified_cfg)
-            self.log_warmup("Unified CFG", idx, num_candidates, query, shared_ctx, unique_ctx, is_causal)
-            self._prepare_dummy_unified_scenario(unified_cfg)
+        with tqdm(total=num_candidates, desc="Unified Attention warmup", unit="item") as pbar:
+            for idx, (query, shared_ctx, unique_ctx, is_causal) in enumerate(reversed(buckets)):
+                unified_cfg = (query, shared_ctx, unique_ctx, is_causal)
+                if unified_cfg in self.graphed_buckets:
+                    continue
+                self.graphed_buckets.add(unified_cfg)
+                self.log_warmup("Unified CFG", idx, num_candidates, query, shared_ctx, unique_ctx, is_causal)
+                self._prepare_dummy_unified_scenario(unified_cfg)
+                pbar.set_postfix_str(f"{idx}/{num_candidates}")
+                pbar.update(1)
 
     def _add_dummy_request(self,
                            requests,

From 0dea976de1f7947e1f959d12448a0236a647e924 Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Thu, 6 Nov 2025 16:08:35 +0200
Subject: [PATCH 5/9] Upsie lol

Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
---
 vllm_gaudi/v1/worker/hpu_model_runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py
index 770a88daf..60cc801eb 100644
--- a/vllm_gaudi/v1/worker/hpu_model_runner.py
+++ b/vllm_gaudi/v1/worker/hpu_model_runner.py
@@ -3749,8 +3749,8 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
         num_candidates = len(buckets)
         captured_all = True
         developer_settings = get_config().VLLM_ENABLE_EXPERIMENTAL_FLAGS
-        phase = {'Prompt' if is_prompt else 'Decode'}
-        desc = phase + " warmup processing: "
+        phase = 'Prompt' if is_prompt else 'Decode'
+        desc = f'{phase} warmup processing: '
         with tqdm(total=num_candidates, desc=desc, unit="item") as pbar:
             for idx, (batch_size, seq_len, num_blocks) in enumerate(reversed(buckets)):
                 if seq_len > self.max_num_tokens:

From 73c305e0b60f6d80d07d98993ecc1b1d14044ba0 Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Thu, 6 Nov 2025 16:37:53 +0200
Subject: [PATCH 6/9] Another one thank you

Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
---
 vllm_gaudi/extension/runtime.py          | 2 +-
 vllm_gaudi/v1/worker/hpu_model_runner.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/vllm_gaudi/extension/runtime.py b/vllm_gaudi/extension/runtime.py
index 1f267bdce..e60233c54 100644
--- a/vllm_gaudi/extension/runtime.py
+++ b/vllm_gaudi/extension/runtime.py
@@ -63,7 +63,7 @@ def finalize_config():
 
     user_flags = filter_defined(detected, USER_FLAGS)
     experimental_flags = filter_defined(detected, EXPERIMENTAL_FLAGS)
-    experimental_flags = [flag for flag in experimental_flags if flag not in user_flags]
+    experimental_flags = {k: v for k, v in user_flags.items() if k not in user_flags}
     environment_values = filter_defined(detected, ENVIRONMENT_VALUES)
     feature_values = filter_defined(detected, FEATURE_VALUES)
 
diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py
index 60cc801eb..e781a12b3 100644
--- a/vllm_gaudi/v1/worker/hpu_model_runner.py
+++ b/vllm_gaudi/v1/worker/hpu_model_runner.py
@@ -3787,13 +3787,15 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
     def warmup_unified_graphs(self, buckets, kv_cache):
         idx = 0
         num_candidates = len(buckets)
+        developer_settings = get_config().VLLM_ENABLE_EXPERIMENTAL_FLAGS
         with tqdm(total=num_candidates, desc="Unified Attention warmup", unit="item") as pbar:
             for idx, (query, shared_ctx, unique_ctx, is_causal) in enumerate(reversed(buckets)):
                 unified_cfg = (query, shared_ctx, unique_ctx, is_causal)
                 if unified_cfg in self.graphed_buckets:
                     continue
                 self.graphed_buckets.add(unified_cfg)
-                self.log_warmup("Unified CFG", idx, num_candidates, query, shared_ctx, unique_ctx, is_causal)
+                if developer_settings:
+                    self.log_warmup("Unified CFG", idx, num_candidates, query, shared_ctx, unique_ctx, is_causal)
                 self._prepare_dummy_unified_scenario(unified_cfg)
                 pbar.set_postfix_str(f"{idx}/{num_candidates}")
                 pbar.update(1)

From f2e3541248cdf6dbcd3bd76716442ba24c48dc8d Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Thu, 6 Nov 2025 16:56:19 +0200
Subject: [PATCH 7/9] precommit

Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
---
 vllm_gaudi/extension/runtime.py          | 1 -
 vllm_gaudi/v1/worker/hpu_model_runner.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm_gaudi/extension/runtime.py b/vllm_gaudi/extension/runtime.py
index e60233c54..7f56f1053 100644
--- a/vllm_gaudi/extension/runtime.py
+++ b/vllm_gaudi/extension/runtime.py
@@ -78,7 +78,6 @@ def finalize_config():
             "From v0.12.0 release using those flags without VLLM_ENABLE_EXPERIMENTAL_FLAGS will trigger a fatal error.")
         logger().warning(footer)
 
-
     dump('Environment', environment_values)
     dump('Features', feature_values)
     dump('User flags', user_flags)
diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py
index 45d26ed20..f0de8e9f3 100644
--- a/vllm_gaudi/v1/worker/hpu_model_runner.py
+++ b/vllm_gaudi/v1/worker/hpu_model_runner.py
@@ -3768,7 +3768,7 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
                     continue
                 self.graphed_buckets.add(graphed_bucket)
                 if developer_settings:
-                    self.log_warmup(phase, idx, num_candidates, batch_size, seq_len, num_blocks)                
+                    self.log_warmup(phase, idx, num_candidates, batch_size, seq_len, num_blocks)
                 prompt_cfg, decode_cfg = None, None
                 with HabanaMemoryProfiler() as mem_prof:
                     if is_prompt:

From b3cf22ac6010c16a08b59e7adf6d04b83fdefb8b Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Fri, 14 Nov 2025 10:40:35 +0200
Subject: [PATCH 8/9] Change flag name

Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
---
 vllm_gaudi/extension/features.py         | 2 +-
 vllm_gaudi/extension/runtime.py          | 4 ++--
 vllm_gaudi/v1/worker/hpu_model_runner.py | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/vllm_gaudi/extension/features.py b/vllm_gaudi/extension/features.py
index fd6275524..d657fc279 100644
--- a/vllm_gaudi/extension/features.py
+++ b/vllm_gaudi/extension/features.py
@@ -12,7 +12,7 @@
 
 def get_user_flags():
     flags = [
-        Env('VLLM_ENABLE_EXPERIMENTAL_FLAGS', boolean),
+        Env('VLLM_DEVELOPER_MODE', boolean),
         Env('VLLM_EXPONENTIAL_BUCKETING', boolean),
         Env('VLLM_PROMPT_BS_BUCKET_MIN', int),
         Env('VLLM_PROMPT_BS_BUCKET_STEP', int),
diff --git a/vllm_gaudi/extension/runtime.py b/vllm_gaudi/extension/runtime.py
index 7f56f1053..f800843b5 100644
--- a/vllm_gaudi/extension/runtime.py
+++ b/vllm_gaudi/extension/runtime.py
@@ -67,7 +67,7 @@ def finalize_config():
     environment_values = filter_defined(detected, ENVIRONMENT_VALUES)
     feature_values = filter_defined(detected, FEATURE_VALUES)
 
-    if len(experimental_flags) > 0 and not detected.VLLM_ENABLE_EXPERIMENTAL_FLAGS:
+    if len(experimental_flags) > 0 and not detected.VLLM_DEVELOPER_MODE:
         asterisks = 48 * '*'
         header = f"{asterisks} Warning! {asterisks}"
         footer = '*' * len(header)
@@ -75,7 +75,7 @@ def finalize_config():
         logger().warning(
             f"Following environment variables are considered experimental: {', '.join(experimental_flags)}")
         logger().warning(
-            "From v0.12.0 release using those flags without VLLM_ENABLE_EXPERIMENTAL_FLAGS will trigger a fatal error.")
+            "From v0.12.0 release using those flags without VLLM_DEVELOPER_MODE will trigger a fatal error.")
         logger().warning(footer)
 
     dump('Environment', environment_values)
diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py
index 11e5a6880..de51c390b 100644
--- a/vllm_gaudi/v1/worker/hpu_model_runner.py
+++ b/vllm_gaudi/v1/worker/hpu_model_runner.py
@@ -3761,7 +3761,7 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
         idx = 0
         num_candidates = len(buckets)
         captured_all = True
-        developer_settings = get_config().VLLM_ENABLE_EXPERIMENTAL_FLAGS
+        developer_settings = get_config().VLLM_DEVELOPER_MODE
         phase = 'Prompt' if is_prompt else 'Decode'
         desc = f'{phase} warmup processing: '
         with tqdm(total=num_candidates, desc=desc, unit="item") as pbar:
@@ -3800,7 +3800,7 @@ def warmup_graphs(self, buckets, is_prompt, kv_caches, starting_mem=0, total_bat
     def warmup_unified_graphs(self, buckets, kv_cache):
         idx = 0
         num_candidates = len(buckets)
-        developer_settings = get_config().VLLM_ENABLE_EXPERIMENTAL_FLAGS
+        developer_settings = get_config().VLLM_DEVELOPER_MODE
         with tqdm(total=num_candidates, desc="Unified Attention warmup", unit="item") as pbar:
             for idx, (query, shared_ctx, unique_ctx, is_causal) in enumerate(reversed(buckets)):
                 unified_cfg = (query, shared_ctx, unique_ctx, is_causal)

From 06cae201b89485df05609ec5ff3b3856d5671d29 Mon Sep 17 00:00:00 2001
From: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Date: Fri, 14 Nov 2025 10:43:44 +0200
Subject: [PATCH 9/9] readme

Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
---
 docs/configuration/env_vars.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/configuration/env_vars.md b/docs/configuration/env_vars.md
index 204f93215..286043987 100644
--- a/docs/configuration/env_vars.md
+++ b/docs/configuration/env_vars.md
@@ -23,7 +23,9 @@ This document lists the supported diagnostic and profiling, as well as performan
 | `VLLM_EXPONENTIAL_BUCKETING` | Enables exponential bucket spacing instead of linear spacing. | `true`        |
 | `VLLM_BUCKETING_FROM_FILE`   | Enables reading bucket configuration from file | `None`        |
 
-## Experimental Parameters
+## Developer Mode Parameters
+
+To enter developer mode use `VLLM_DEVELOPER_MODE`:
 
 | Parameter name     | Description              | Default value |
 | ------------------ | ------------------------ | ------------- |