From 4fbc7e625fc68e445e624ef487fae56753b3c5f5 Mon Sep 17 00:00:00 2001 From: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com> Date: Wed, 26 Nov 2025 09:36:57 +0000 Subject: [PATCH] Move some of the tests from b200 to gb200 to avoid load model issue Signed-off-by: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com> --- jenkins/L0_Test.groovy | 9 ++++++--- tests/integration/defs/accuracy/test_llm_api_pytorch.py | 4 +--- .../test_lists/test-db/l0_gb200_multi_nodes.yml | 6 ++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index e9d94a39738..ec31fee1b34 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -2783,7 +2783,8 @@ def launchTestJobs(pipeline, testFilter) "DGX_B200-4_GPUs-PyTorch-1": ["b200-x4", "l0_dgx_b200", 1, 2, 4], "DGX_B200-4_GPUs-PyTorch-2": ["b200-x4", "l0_dgx_b200", 2, 2, 4], "DGX_B200-4_GPUs-PyTorch-Ray-1": ["b200-x4", "l0_dgx_b200", 1, 1, 4], - "DGX_B200-8_GPUs-PyTorch-1": ["b200-x8", "l0_dgx_b200", 1, 1, 8], + // Move DGX_B200-8_GPUs-PyTorch-1 due to nodes load model issue. + // "DGX_B200-8_GPUs-PyTorch-1": ["b200-x8", "l0_dgx_b200", 1, 1, 8], "DGX_B200-4_GPUs-PyTorch-Post-Merge-1": ["b200-trtllm", "l0_dgx_b200", 1, 1, 4, 1, true], "DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 1, 4], // Perf sanity post merge test @@ -2825,8 +2826,10 @@ def launchTestJobs(pipeline, testFilter) multiNodesSBSAConfigs = [ // Each testcase uses 8 GPUs and 2 nodes. // https://nvbugs/5598863 (uncorrectable NVLink error detected during the execution) may not exist in OCI machines. - "GB200-8_GPUs-2_Nodes-PyTorch-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 2, 8, 2], - "GB200-8_GPUs-2_Nodes-PyTorch-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 2, 8, 2], + "GB200-8_GPUs-2_Nodes-PyTorch-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 3, 8, 2], + "GB200-8_GPUs-2_Nodes-PyTorch-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 3, 8, 2], + // Request 1 more node for tests that moved from B200 to GB200 + "GB200-8_GPUs-2_Nodes-PyTorch-3": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 3, 3, 8, 2], "GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 3, 8, 2], "GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 3, 8, 2], "GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 3, 3, 8, 2], diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 75d636f130c..669768d75f2 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -2426,7 +2426,7 @@ def test_nvfp4_multi_gpus_chunked_prefill(self, tp_size, pp_size, ep_size, task.evaluate(llm) @skip_pre_blackwell - @pytest.mark.skip_less_device(8) + @pytest.mark.skip_less_mpi_world_size(8) def test_nvfp4_multi_gpus_corner_case(self): """ This test is used to test the corner case of the NVFP4 model. @@ -2575,7 +2575,6 @@ class TestDeepSeekV32(LlmapiAccuracyTestHarness): MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3.2-Exp-hf" @pytest.mark.skip_less_mpi_world_size(8) - @pytest.mark.skip_less_device(8) @skip_pre_hopper @pytest.mark.skip_less_device_memory(140000) @pytest.mark.parametrize( @@ -2650,7 +2649,6 @@ def test_fp8_blockscale(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv, task.evaluate(llm) @pytest.mark.skip_less_mpi_world_size(8) - @pytest.mark.skip_less_device(8) @skip_pre_blackwell @pytest.mark.parametrize( "tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,cuda_graph,overlap_scheduler,max_batch_size,moe_backend", diff --git a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml index 57c3b6fd810..ee7a09804e3 100644 --- a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml +++ b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml @@ -15,9 +15,15 @@ l0_gb200_multi_nodes: tests: - accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput] TIMEOUT (180) - accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_mtp] TIMEOUT (180) + - accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_bs8_mtp] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue - accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] TIMEOUT (180) - accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp] TIMEOUT (180) - accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp_trtllm] TIMEOUT (180) + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline_mtp1] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline_mtp1] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue + - accuracy/test_disaggregated_serving.py::TestDeepSeekV32Exp::test_auto_dtype[False] TIMEOUT (360) # Temporarily added to GB200 to avoid load model issue - condition: ranges: # 2 nodes with each node has 4 GPUs