Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions jenkins/L0_Test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2783,7 +2783,8 @@ def launchTestJobs(pipeline, testFilter)
"DGX_B200-4_GPUs-PyTorch-1": ["b200-x4", "l0_dgx_b200", 1, 2, 4],
"DGX_B200-4_GPUs-PyTorch-2": ["b200-x4", "l0_dgx_b200", 2, 2, 4],
"DGX_B200-4_GPUs-PyTorch-Ray-1": ["b200-x4", "l0_dgx_b200", 1, 1, 4],
"DGX_B200-8_GPUs-PyTorch-1": ["b200-x8", "l0_dgx_b200", 1, 1, 8],
// Move DGX_B200-8_GPUs-PyTorch-1 due to nodes load model issue.
// "DGX_B200-8_GPUs-PyTorch-1": ["b200-x8", "l0_dgx_b200", 1, 1, 8],
"DGX_B200-4_GPUs-PyTorch-Post-Merge-1": ["b200-trtllm", "l0_dgx_b200", 1, 1, 4, 1, true],
"DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 1, 4],
// Perf sanity post merge test
Expand Down Expand Up @@ -2825,8 +2826,10 @@ def launchTestJobs(pipeline, testFilter)
multiNodesSBSAConfigs = [
// Each testcase uses 8 GPUs and 2 nodes.
// https://nvbugs/5598863 (uncorrectable NVLink error detected during the execution) may not exist in OCI machines.
"GB200-8_GPUs-2_Nodes-PyTorch-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 2, 8, 2],
"GB200-8_GPUs-2_Nodes-PyTorch-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 2, 8, 2],
"GB200-8_GPUs-2_Nodes-PyTorch-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 3, 8, 2],
"GB200-8_GPUs-2_Nodes-PyTorch-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 3, 8, 2],
// Request 1 more node for tests that moved from B200 to GB200
"GB200-8_GPUs-2_Nodes-PyTorch-3": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 3, 3, 8, 2],
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 3, 8, 2],
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 3, 8, 2],
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 3, 3, 8, 2],
Expand Down
4 changes: 1 addition & 3 deletions tests/integration/defs/accuracy/test_llm_api_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2426,7 +2426,7 @@ def test_nvfp4_multi_gpus_chunked_prefill(self, tp_size, pp_size, ep_size,
task.evaluate(llm)

@skip_pre_blackwell
@pytest.mark.skip_less_device(8)
@pytest.mark.skip_less_mpi_world_size(8)
def test_nvfp4_multi_gpus_corner_case(self):
"""
This test is used to test the corner case of the NVFP4 model.
Expand Down Expand Up @@ -2575,7 +2575,6 @@ class TestDeepSeekV32(LlmapiAccuracyTestHarness):
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3.2-Exp-hf"

@pytest.mark.skip_less_mpi_world_size(8)
@pytest.mark.skip_less_device(8)
@skip_pre_hopper
@pytest.mark.skip_less_device_memory(140000)
@pytest.mark.parametrize(
Expand Down Expand Up @@ -2650,7 +2649,6 @@ def test_fp8_blockscale(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
task.evaluate(llm)

@pytest.mark.skip_less_mpi_world_size(8)
@pytest.mark.skip_less_device(8)
@skip_pre_blackwell
@pytest.mark.parametrize(
"tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,cuda_graph,overlap_scheduler,max_batch_size,moe_backend",
Expand Down
6 changes: 6 additions & 0 deletions tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@ l0_gb200_multi_nodes:
tests:
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput] TIMEOUT (180)
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_mtp] TIMEOUT (180)
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_bs8_mtp] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] TIMEOUT (180)
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp] TIMEOUT (180)
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp_trtllm] TIMEOUT (180)
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline_mtp1] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline_mtp1] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
- accuracy/test_disaggregated_serving.py::TestDeepSeekV32Exp::test_auto_dtype[False] TIMEOUT (360) # Temporarily added to GB200 to avoid load model issue
- condition:
ranges:
# 2 nodes with each node has 4 GPUs
Expand Down