diff --git a/tests/integration/defs/examples/serve/test_configs/DeepSeek-R1-FP4.yml b/tests/integration/defs/examples/serve/test_configs/DeepSeek-R1-FP4.yml new file mode 100644 index 00000000000..7f7bcf3d50f --- /dev/null +++ b/tests/integration/defs/examples/serve/test_configs/DeepSeek-R1-FP4.yml @@ -0,0 +1,18 @@ +enable_iter_perf_stats: true +print_iter_log: false +cuda_graph_config: + max_batch_size: 16 + enable_padding: false +moe_config: + backend: TRTLLM + max_num_tokens: 32768 +speculative_config: + decoding_type: MTP + num_nextn_predict_layers: 3 +disable_overlap_scheduler: true +enable_autotuner: true +kv_cache_config: + free_gpu_memory_fraction: 0.6 + enable_block_reuse: true + enable_partial_reuse: false +enable_chunked_prefill: true diff --git a/tests/integration/defs/examples/serve/test_serve.py b/tests/integration/defs/examples/serve/test_serve.py index c861d525a26..b218bf3ca81 100755 --- a/tests/integration/defs/examples/serve/test_serve.py +++ b/tests/integration/defs/examples/serve/test_serve.py @@ -1,8 +1,10 @@ import os import time +import pytest import requests -from defs.conftest import llm_models_root, skip_no_hopper +from defs.conftest import (llm_models_root, skip_no_hopper, + skip_post_blackwell_ultra, skip_pre_hopper) from defs.trt_test_alternative import popen, print_error, print_info from openai import OpenAI from requests.exceptions import RequestException @@ -130,3 +132,56 @@ def test_extra_llm_api_options(serve_test_root): model_name = model_path.split('/')[-1] # "Qwen3-30B-A3B-FP8" # Test the server with OpenAI chat completion check_openai_chat_completion(model_name=model_name) + + +@skip_pre_hopper +@skip_post_blackwell_ultra +@pytest.mark.skip_less_device(8) +def test_extra_llm_api_options_for_deepseek_r1_fp4(serve_test_root): + test_configs_root = f"{serve_test_root}/test_configs" + + # moe backend = CUTLASS which only supports fp8 blockscale on Hopper + config_file = f"{test_configs_root}/DeepSeek-R1-FP4.yml" + model_path = f"{llm_models_root()}/DeepSeek-R1/DeepSeek-R1-FP4" + + # Assert that required files and directories exist + assert os.path.exists( + test_configs_root + ), f"test_configs_root directory does not exist: {test_configs_root}" + assert os.path.exists( + config_file), f"config_file does not exist: {config_file}" + assert os.path.exists( + model_path), f"model_path does not exist: {model_path}" + + cmd = [ + "trtllm-serve", + model_path, + "--host", + "0.0.0.0", + "--port", + "8000", + "--backend", + "pytorch", + "--max_batch_size", + "32", + "--max_num_tokens", + "32768", + "--max_seq_len", + "163840", + "--tp_size", + "8", + "--ep_size", + "1", + "--extra_llm_api_options", + config_file, + "--log_level", + "info", + ] + + print_info("Launching trtllm-serve...") + with popen(cmd): + check_server_ready(timeout_timer=3600) + # Extract model name from the model path for consistency + model_name = model_path.split('/')[-1] # "Qwen3-30B-A3B-FP8" + # Test the server with OpenAI chat completion + check_openai_chat_completion(model_name=model_name) diff --git a/tests/integration/defs/perf/disagg/compare_backends.py b/tests/integration/defs/perf/disagg/compare_backends.py index c1a9ed541be..ff564989746 100644 --- a/tests/integration/defs/perf/disagg/compare_backends.py +++ b/tests/integration/defs/perf/disagg/compare_backends.py @@ -2,6 +2,7 @@ """Compare performance test results between different backends (UCX vs NIXL).""" import argparse +import os import re import sys @@ -45,6 +46,10 @@ def compare_backends(csv_path, threshold=5.0, default_backend="NIXL"): DataFrame: Comparison results """ # Read CSV file + if not os.path.exists(csv_path): + print(f"CSV file not found: {csv_path}") + sys.exit(0) + df = pd.read_csv(csv_path) if len(df) == 0: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index ba44ed4c103..17f9ad8a467 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 0 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json accuracy: datasets: - dataset_name: gsm8k diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml index 489b4aeacf5..0b7f9b5116c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 8 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml index 5a25ecfc4ac..77c6d9efe3e 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 11 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 28c55ce399c..5a521d153c9 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 10 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index e2a9f705885..c18dc647335 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 13 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml index 5cf614ba631..5867f7cec04 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 9 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml index 872e5c7a1c5..b6323430a38 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 12 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml index c6879f3cbb7..2edcde24191 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 1 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml index 2f254163c5f..e12202ce057 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 3 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 01362f78534..4df17554c2b 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 0 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml index 17ffdbd15b8..ef6be466b2b 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 1k1k config_index: 2 - dataset_file: datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-1024-1024-100000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml index 52012de6e20..36de0c4aadb 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 14 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml index 216c6f8899c..c30872e04ef 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 5 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml index 104e567525e..59d082c1b0d 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 7 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 9aa8e38d158..55eb4f5a0ab 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 4 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index d60df72d594..f3210441c80 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -8,7 +8,7 @@ metadata: script_file: disaggr_torch.slurm benchmark_type: 8k1k config_index: 6 - dataset_file: datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json + dataset_file: disagg_datasets/deepseek-r1-8192-1024-200000-ratio-1_for_serve.json slurm: script_file: disaggr_torch.slurm partition: diff --git a/tests/integration/test_lists/qa/llm_function_core.txt b/tests/integration/test_lists/qa/llm_function_core.txt index 69c24f0f5ee..cb739b1e8f4 100644 --- a/tests/integration/test_lists/qa/llm_function_core.txt +++ b/tests/integration/test_lists/qa/llm_function_core.txt @@ -755,6 +755,7 @@ examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3-small-128k-instruct-eagle2] examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3.5-mini-instruct-eagle2] examples/serve/test_serve.py::test_extra_llm_api_options +examples/serve/test_serve.py::test_extra_llm_api_options_for_deepseek_r1_fp4 TIMEOUT (120) examples/serve/test_serve_negative.py::test_invalid_max_tokens examples/serve/test_serve_negative.py::test_invalid_temperature examples/serve/test_serve_negative.py::test_invalid_top_p[-0.1]