3838 "max_tokens" : 10 ,
3939}
4040
41- aisbench_cases = [{
41+ aisbench_gsm8k = [{
42+ "case_type" : "accuracy" ,
43+ "dataset_path" : "vllm-ascend/gsm8k-lite" ,
44+ "request_conf" : "vllm_api_general_chat" ,
45+ "dataset_conf" : "gsm8k/gsm8k_gen_0_shot_cot_chat_prompt" ,
46+ "max_out_len" : 32768 ,
47+ "batch_size" : 32 ,
48+ "baseline" : 95 ,
49+ "threshold" : 5
50+ }]
51+
52+ aisbench_aime = [{
4253 "case_type" : "accuracy" ,
4354 "dataset_path" : "vllm-ascend/aime2024" ,
4455 "request_conf" : "vllm_api_general_chat" ,
4556 "dataset_conf" : "aime2024/aime2024_gen_0_shot_chat_prompt" ,
4657 "max_out_len" : 32768 ,
4758 "batch_size" : 32 ,
48- "baseline" : 80 ,
59+ "baseline" : 86.67 ,
4960 "threshold" : 7
5061}]
5162
@@ -101,6 +112,7 @@ async def test_models(model: str, mode: str) -> None:
101112 json .dumps (speculative_config )])
102113 server_args .extend (["--gpu-memory-utilization" , "0.92" ])
103114 additional_config ["torchair_graph_config" ] = {"enabled" : True }
115+ aisbench_cases = aisbench_gsm8k
104116 if mode == "mtp3" :
105117 env_dict ["HCCL_OP_EXPANSION_MODE" ] = "AIV"
106118 server_args .extend (["--max-num-batched-tokens" , "2048" ])
@@ -113,6 +125,7 @@ async def test_models(model: str, mode: str) -> None:
113125 ["--compilation-config" ,
114126 json .dumps (compilation_config )])
115127 additional_config ["torchair_graph_config" ] = {"enabled" : False }
128+ aisbench_cases = aisbench_aime
116129 server_args .extend (["--additional-config" , json .dumps (additional_config )])
117130 request_keyword_args : dict [str , Any ] = {
118131 ** api_keyword_args ,
0 commit comments