Skip to content

Commit dd9b92c

Browse files
committed
Move some of the tests from b200 to gb200 to avoid load model issue
Signed-off-by: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com>
1 parent 8104a78 commit dd9b92c

File tree

3 files changed

+13
-6
lines changed

3 files changed

+13
-6
lines changed

jenkins/L0_Test.groovy

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2749,7 +2749,8 @@ def launchTestJobs(pipeline, testFilter)
27492749
"DGX_B200-4_GPUs-PyTorch-1": ["b200-x4", "l0_dgx_b200", 1, 2, 4],
27502750
"DGX_B200-4_GPUs-PyTorch-2": ["b200-x4", "l0_dgx_b200", 2, 2, 4],
27512751
"DGX_B200-4_GPUs-PyTorch-Ray-1": ["b200-x4", "l0_dgx_b200", 1, 1, 4],
2752-
"DGX_B200-8_GPUs-PyTorch-1": ["b200-x8", "l0_dgx_b200", 1, 1, 8],
2752+
// Move DGX_B200-8_GPUs-PyTorch-1 due to nodes load model issue.
2753+
// "DGX_B200-8_GPUs-PyTorch-1": ["b200-x8", "l0_dgx_b200", 1, 1, 8],
27532754
"DGX_B200-4_GPUs-PyTorch-Post-Merge-1": ["b200-trtllm", "l0_dgx_b200", 1, 1, 4, 1, true],
27542755
"DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 1, 4],
27552756
// Perf sanity post merge test
@@ -2791,8 +2792,10 @@ def launchTestJobs(pipeline, testFilter)
27912792
multiNodesSBSAConfigs = [
27922793
// Each testcase uses 8 GPUs and 2 nodes.
27932794
// https://nvbugs/5598863 (uncorrectable NVLink error detected during the execution) may not exist in OCI machines.
2794-
"GB200-8_GPUs-2_Nodes-PyTorch-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 2, 8, 2],
2795-
"GB200-8_GPUs-2_Nodes-PyTorch-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 2, 8, 2],
2795+
"GB200-8_GPUs-2_Nodes-PyTorch-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 3, 8, 2],
2796+
"GB200-8_GPUs-2_Nodes-PyTorch-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 3, 8, 2],
2797+
// Request 1 more node for tests that moved from B200 to GB200
2798+
"GB200-8_GPUs-2_Nodes-PyTorch-3": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 3, 3, 8, 2],
27962799
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 3, 8, 2],
27972800
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 3, 8, 2],
27982801
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 3, 3, 8, 2],

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2250,7 +2250,7 @@ def test_nvfp4_multi_gpus_chunked_prefill(self, tp_size, pp_size, ep_size,
22502250
task.evaluate(llm)
22512251

22522252
@skip_pre_blackwell
2253-
@pytest.mark.skip_less_device(8)
2253+
@pytest.mark.skip_less_mpi_world_size(8)
22542254
def test_nvfp4_multi_gpus_corner_case(self):
22552255
"""
22562256
This test is used to test the corner case of the NVFP4 model.
@@ -2399,7 +2399,6 @@ class TestDeepSeekV32(LlmapiAccuracyTestHarness):
23992399
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3.2-Exp-hf"
24002400

24012401
@pytest.mark.skip_less_mpi_world_size(8)
2402-
@pytest.mark.skip_less_device(8)
24032402
@skip_pre_hopper
24042403
@pytest.mark.skip_less_device_memory(140000)
24052404
@pytest.mark.parametrize(
@@ -2470,7 +2469,6 @@ def test_fp8_blockscale(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
24702469
task.evaluate(llm)
24712470

24722471
@pytest.mark.skip_less_mpi_world_size(8)
2473-
@pytest.mark.skip_less_device(8)
24742472
@skip_pre_blackwell
24752473
@pytest.mark.parametrize(
24762474
"tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,cuda_graph,overlap_scheduler,max_batch_size,moe_backend",

tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,15 @@ l0_gb200_multi_nodes:
1515
tests:
1616
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput] TIMEOUT (180)
1717
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_mtp] TIMEOUT (180)
18+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_bs8_mtp] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
1819
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] TIMEOUT (180)
1920
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp] TIMEOUT (180)
2021
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp_trtllm] TIMEOUT (180)
22+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
23+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline_mtp1] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
24+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
25+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline_mtp1] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
26+
- accuracy/test_disaggregated_serving.py::TestDeepSeekV32Exp::test_auto_dtype[False] TIMEOUT (360) # Temporarily added to GB200 to avoid load model issue
2127
- condition:
2228
ranges:
2329
# 2 nodes with each node has 4 GPUs

0 commit comments

Comments
 (0)