Skip to content

Commit 1f39185

Browse files
enlarge deepseek r1 timeout
try to use b200 to load this one to speed up a little bit add test case for rcca 5527655 Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
1 parent 5425d96 commit 1f39185

File tree

3 files changed

+75
-2
lines changed

3 files changed

+75
-2
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
enable_iter_perf_stats: true
2+
print_iter_log: false
3+
cuda_graph_config:
4+
max_batch_size: 16
5+
enable_padding: false
6+
moe_config:
7+
backend: TRTLLM
8+
max_num_tokens: 32768
9+
speculative_config:
10+
decoding_type: MTP
11+
num_nextn_predict_layers: 3
12+
disable_overlap_scheduler: true
13+
enable_autotuner: true
14+
kv_cache_config:
15+
free_gpu_memory_fraction: 0.6
16+
enable_block_reuse: true
17+
enable_partial_reuse: false
18+
enable_chunked_prefill: true

tests/integration/defs/examples/serve/test_serve.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import os
22
import time
3-
3+
import pytest
44
import requests
5-
from defs.conftest import llm_models_root, skip_no_hopper
5+
from defs.conftest import llm_models_root, skip_no_hopper, \
6+
skip_pre_hopper, skip_post_blackwell_ultra
67
from defs.trt_test_alternative import popen, print_error, print_info
78
from openai import OpenAI
89
from requests.exceptions import RequestException
@@ -130,3 +131,56 @@ def test_extra_llm_api_options(serve_test_root):
130131
model_name = model_path.split('/')[-1] # "Qwen3-30B-A3B-FP8"
131132
# Test the server with OpenAI chat completion
132133
check_openai_chat_completion(model_name=model_name)
134+
135+
136+
@skip_pre_hopper
137+
@skip_post_blackwell_ultra
138+
@pytest.mark.skip_less_device(8)
139+
def test_extra_llm_api_options_for_deepseek_r1_fp4(serve_test_root):
140+
test_configs_root = f"{serve_test_root}/test_configs"
141+
142+
# moe backend = CUTLASS which only supports fp8 blockscale on Hopper
143+
config_file = f"{test_configs_root}/DeepSeek-R1-FP4.yml"
144+
model_path = f"{llm_models_root()}/DeepSeek-R1/DeepSeek-R1-FP4"
145+
146+
# Assert that required files and directories exist
147+
assert os.path.exists(
148+
test_configs_root
149+
), f"test_configs_root directory does not exist: {test_configs_root}"
150+
assert os.path.exists(
151+
config_file), f"config_file does not exist: {config_file}"
152+
assert os.path.exists(
153+
model_path), f"model_path does not exist: {model_path}"
154+
155+
cmd = [
156+
"trtllm-serve",
157+
model_path,
158+
"--host",
159+
"0.0.0.0",
160+
"--port",
161+
"8000",
162+
"--backend",
163+
"pytorch",
164+
"--max_batch_size",
165+
"32",
166+
"--max_num_tokens",
167+
"32768",
168+
"--max_seq_len",
169+
"163840",
170+
"--tp_size",
171+
"8",
172+
"--ep_size",
173+
"1",
174+
"--extra_llm_api_options",
175+
config_file,
176+
"--log_level",
177+
"info",
178+
]
179+
180+
print_info("Launching trtllm-serve...")
181+
with popen(cmd):
182+
check_server_ready(timeout_timer=3600)
183+
# Extract model name from the model path for consistency
184+
model_name = model_path.split('/')[-1] # "Qwen3-30B-A3B-FP8"
185+
# Test the server with OpenAI chat completion
186+
check_openai_chat_completion(model_name=model_name)

tests/integration/test_lists/qa/llm_function_core.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,7 @@ examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3-small-128k-instruct-eagle2]
755755
examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3.5-mini-instruct-eagle2]
756756

757757
examples/serve/test_serve.py::test_extra_llm_api_options
758+
examples/serve/test_serve.py::test_extra_llm_api_options_for_deepseek_r1_fp4 TIMEOUT (120)
758759
examples/serve/test_serve_negative.py::test_invalid_max_tokens
759760
examples/serve/test_serve_negative.py::test_invalid_temperature
760761
examples/serve/test_serve_negative.py::test_invalid_top_p[-0.1]

0 commit comments

Comments
 (0)