Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
enable_iter_perf_stats: true
print_iter_log: false
cuda_graph_config:
max_batch_size: 16
enable_padding: false
moe_config:
backend: TRTLLM
max_num_tokens: 32768
speculative_config:
decoding_type: MTP
num_nextn_predict_layers: 3
disable_overlap_scheduler: true
enable_autotuner: true
kv_cache_config:
free_gpu_memory_fraction: 0.6
enable_block_reuse: true
enable_partial_reuse: false
enable_chunked_prefill: true
57 changes: 56 additions & 1 deletion tests/integration/defs/examples/serve/test_serve.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
import time

import pytest
import requests
from defs.conftest import llm_models_root, skip_no_hopper
from defs.conftest import (llm_models_root, skip_no_hopper,
skip_post_blackwell_ultra, skip_pre_hopper)
from defs.trt_test_alternative import popen, print_error, print_info
from openai import OpenAI
from requests.exceptions import RequestException
Expand Down Expand Up @@ -130,3 +132,56 @@ def test_extra_llm_api_options(serve_test_root):
model_name = model_path.split('/')[-1] # "Qwen3-30B-A3B-FP8"
# Test the server with OpenAI chat completion
check_openai_chat_completion(model_name=model_name)


@skip_pre_hopper
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fp4 does not support on hopper, use skip_pre_blackwell?

@skip_post_blackwell_ultra
@pytest.mark.skip_less_device(8)
def test_extra_llm_api_options_for_deepseek_r1_fp4(serve_test_root):
test_configs_root = f"{serve_test_root}/test_configs"

# moe backend = CUTLASS which only supports fp8 blockscale on Hopper
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this comment is kind of misleading

config_file = f"{test_configs_root}/DeepSeek-R1-FP4.yml"
model_path = f"{llm_models_root()}/DeepSeek-R1/DeepSeek-R1-FP4"

# Assert that required files and directories exist
assert os.path.exists(
test_configs_root
), f"test_configs_root directory does not exist: {test_configs_root}"
assert os.path.exists(
config_file), f"config_file does not exist: {config_file}"
assert os.path.exists(
model_path), f"model_path does not exist: {model_path}"

cmd = [
"trtllm-serve",
model_path,
"--host",
"0.0.0.0",
"--port",
"8000",
"--backend",
"pytorch",
"--max_batch_size",
"32",
"--max_num_tokens",
"32768",
"--max_seq_len",
"163840",
"--tp_size",
"8",
"--ep_size",
"1",
"--extra_llm_api_options",
config_file,
"--log_level",
"info",
]

print_info("Launching trtllm-serve...")
with popen(cmd):
check_server_ready(timeout_timer=3600)
# Extract model name from the model path for consistency
model_name = model_path.split('/')[-1] # "Qwen3-30B-A3B-FP8"
# Test the server with OpenAI chat completion
check_openai_chat_completion(model_name=model_name)
5 changes: 5 additions & 0 deletions tests/integration/defs/perf/disagg/compare_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""Compare performance test results between different backends (UCX vs NIXL)."""

import argparse
import os
import re
import sys

Expand Down Expand Up @@ -45,6 +46,10 @@ def compare_backends(csv_path, threshold=5.0, default_backend="NIXL"):
DataFrame: Comparison results
"""
# Read CSV file
if not os.path.exists(csv_path):
print(f"CSV file not found: {csv_path}")
sys.exit(0)

df = pd.read_csv(csv_path)

if len(df) == 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 0
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
accuracy:
datasets:
- dataset_name: gsm8k
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 8
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 11
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 10
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 13
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 9
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 12
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 1
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 3
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 0
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 1k1k
config_index: 2
dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 8k1k
config_index: 14
dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 8k1k
config_index: 5
dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 8k1k
config_index: 7
dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 8k1k
config_index: 4
dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
script_file: disaggr_torch.slurm
benchmark_type: 8k1k
config_index: 6
dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json
slurm:
script_file: disaggr_torch.slurm
partition: <partition>
Expand Down
1 change: 1 addition & 0 deletions tests/integration/test_lists/qa/llm_function_core.txt
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,7 @@ examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3-small-128k-instruct-eagle2]
examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3.5-mini-instruct-eagle2]

examples/serve/test_serve.py::test_extra_llm_api_options
examples/serve/test_serve.py::test_extra_llm_api_options_for_deepseek_r1_fp4 TIMEOUT (120)
examples/serve/test_serve_negative.py::test_invalid_max_tokens
examples/serve/test_serve_negative.py::test_invalid_temperature
examples/serve/test_serve_negative.py::test_invalid_top_p[-0.1]
Expand Down