diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 23830c4e0781..967b4f0b6513 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -1224,7 +1224,7 @@ steps: - pytest -v -s tests/compile/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm - pytest -v -s tests/distributed/test_context_parallel.py - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048 - - pytest -v -s tests/v1/distributed/test_dbo.py + - pytest -v -s tests/v1/distributed/test_dbo.py ##### B200 test ##### - label: Distributed Tests (B200) # optional diff --git a/tests/evals/gsm8k/configs/Qwen1.5-MoE-W4A16-CT.yaml b/tests/evals/gsm8k/configs/Qwen1.5-MoE-W4A16-CT.yaml index c5dbceeeb2b4..ea9c95158405 100644 --- a/tests/evals/gsm8k/configs/Qwen1.5-MoE-W4A16-CT.yaml +++ b/tests/evals/gsm8k/configs/Qwen1.5-MoE-W4A16-CT.yaml @@ -2,4 +2,7 @@ model_name: "nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16" accuracy_threshold: 0.45 num_questions: 1319 num_fewshot: 5 -max_model_len: 4096 \ No newline at end of file +max_model_len: 4096 +# Duo stream incompatabilbe with this model: https://github.com/vllm-project/vllm/issues/28220 +env: + VLLM_DISABLE_SHARED_EXPERTS_STREAM: "1" diff --git a/tests/evals/gsm8k/test_gsm8k_correctness.py b/tests/evals/gsm8k/test_gsm8k_correctness.py index ce3ab8096b45..b5d67df7bf3d 100644 --- a/tests/evals/gsm8k/test_gsm8k_correctness.py +++ b/tests/evals/gsm8k/test_gsm8k_correctness.py @@ -62,9 +62,11 @@ def test_gsm8k_correctness_param(config_filename, tp_size): str(tp_size), ] + env_dict = eval_config.get("env", None) + # Launch server and run evaluation with RemoteOpenAIServer( - eval_config["model_name"], server_args, max_wait_seconds=480 + eval_config["model_name"], server_args, env_dict=env_dict, max_wait_seconds=480 ) as remote_server: server_url = remote_server.url_for("v1")