4242 "case_type" : "accuracy" ,
4343 "dataset_path" : "vllm-ascend/gsm8k-lite" ,
4444 "request_conf" : "vllm_api_general_chat" ,
45- "dataset_conf" : "gsm8k/gsm8k_gen_0_shot_noncot_chat_prompt " ,
45+ "dataset_conf" : "gsm8k/gsm8k_gen_0_shot_cot_chat_prompt " ,
4646 "max_out_len" : 32768 ,
4747 "batch_size" : 32 ,
4848 "baseline" : 95 ,
6060 "threshold" : 7
6161}]
6262
63- aisbench_case_dict = {"mtp2" : aisbench_gsm8k , "mtp3" : aisbench_aime }
64-
6563
6664@pytest .mark .asyncio
6765@pytest .mark .parametrize ("model" , MODELS )
@@ -114,6 +112,7 @@ async def test_models(model: str, mode: str) -> None:
114112 json .dumps (speculative_config )])
115113 server_args .extend (["--gpu-memory-utilization" , "0.92" ])
116114 additional_config ["torchair_graph_config" ] = {"enabled" : True }
115+ aisbench_cases = aisbench_gsm8k
117116 if mode == "mtp3" :
118117 env_dict ["HCCL_OP_EXPANSION_MODE" ] = "AIV"
119118 server_args .extend (["--max-num-batched-tokens" , "2048" ])
@@ -126,6 +125,7 @@ async def test_models(model: str, mode: str) -> None:
126125 ["--compilation-config" ,
127126 json .dumps (compilation_config )])
128127 additional_config ["torchair_graph_config" ] = {"enabled" : False }
128+ aisbench_cases = aisbench_aime
129129 server_args .extend (["--additional-config" , json .dumps (additional_config )])
130130 request_keyword_args : dict [str , Any ] = {
131131 ** api_keyword_args ,
@@ -145,7 +145,6 @@ async def test_models(model: str, mode: str) -> None:
145145 assert choices [0 ].text , "empty response"
146146 print (choices )
147147 # aisbench test
148- aisbench_cases = aisbench_case_dict [mode ]
149148 run_aisbench_cases (model ,
150149 port ,
151150 aisbench_cases ,
0 commit comments