File tree Expand file tree Collapse file tree 4 files changed +18
-18
lines changed Expand file tree Collapse file tree 4 files changed +18
-18
lines changed Original file line number Diff line number Diff line change 11cadence : weekly
2- model : Qwen/Qwen2.5 -VL-7B -Instruct
3- model_class : Qwen2_5_VLForConditionalGeneration
2+ model : Qwen/Qwen3 -VL-8B -Instruct
3+ model_class : Qwen3VLForConditionalGeneration
44scheme : FP8_DYNAMIC
55recipe : tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml
66lmeval :
77 model : " hf-multimodal"
88 model_args :
99 dtype : bfloat16
10- add_bos_token : True
1110 convert_img_format : True
1211 task : mmmu_val_literature
12+ apply_chat_template : True
1313 num_fewshot : 0
1414 batch_size : 8
15- # dense model achieves accuracy of 0.9 +/ 0.0557
15+ # dense model achieves accuracy of 0.833
1616 metrics :
17- acc,none : 0.8333
18- acc_stderr,none : 0.0557
17+ acc,none : 0.833
Original file line number Diff line number Diff line change 11cadence : " weekly"
2- model : Qwen/Qwen2.5 -VL-7B -Instruct
3- model_class : Qwen2_5_VLForConditionalGeneration
2+ model : Qwen/Qwen3 -VL-8B -Instruct
3+ model_class : Qwen3VLForConditionalGeneration
44scheme : INT8_dyn_per_token
55recipe : tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml
66dataset_id : lmms-lab/flickr30k
@@ -9,12 +9,11 @@ lmeval:
99 model : " hf-multimodal"
1010 model_args :
1111 dtype : bfloat16
12- add_bos_token : True
1312 convert_img_format : True
1413 task : mmmu_val_literature
14+ apply_chat_template : True
1515 num_fewshot : 0
1616 batch_size : 8
17- # dense model achieves accuracy of 0.9 +/ 0.0557
17+ # dense model achieves accuracy of 0.833
1818 metrics :
19- acc,none : 0.833
20- acc_stderr,none : 0.0557
19+ acc,none : 0.833
Original file line number Diff line number Diff line change 11cadence : " weekly"
2- model : Qwen/Qwen2.5 -VL-7B -Instruct
3- model_class : Qwen2_5_VLForConditionalGeneration
2+ model : Qwen/Qwen3 -VL-8B -Instruct
3+ model_class : Qwen3VLForConditionalGeneration
44scheme : W4A16_actorder_weight
55recipe : tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml
66dataset_id : lmms-lab/flickr30k
@@ -9,12 +9,11 @@ lmeval:
99 model : " hf-multimodal"
1010 model_args :
1111 dtype : bfloat16
12- add_bos_token : True
1312 convert_img_format : True
1413 task : mmmu_val_literature
14+ apply_chat_template : True
1515 num_fewshot : 0
1616 batch_size : 8
17- # dense model achieves accuracy of 0.9 +/ 0.0557
17+ # dense model achieves accuracy of 0.8333
1818 metrics :
19- acc,none : 0.8333
20- acc_stderr,none : 0.0557
19+ acc,none : 0.800
Original file line number Diff line number Diff line change @@ -25,6 +25,7 @@ class LmEvalConfig(BaseModel):
2525 num_fewshot : int = 5
2626 limit : int = 1000
2727 batch_size : int = 100
28+ apply_chat_template : bool = False
2829 # Recovery testing (default): compare against base model performance
2930 # Default threshold is 0.95 (retain ≥95% of base), can be overridden
3031 recovery_threshold : Union [float , dict ] = 0.95
@@ -160,6 +161,7 @@ def _eval_base_model(self):
160161 num_fewshot = self .lmeval .num_fewshot ,
161162 limit = self .lmeval .limit ,
162163 device = "cuda:0" ,
164+ apply_chat_template = self .lmeval .apply_chat_template ,
163165 batch_size = self .lmeval .batch_size ,
164166 )
165167
@@ -190,6 +192,7 @@ def _run_lm_eval(self):
190192 num_fewshot = self .lmeval .num_fewshot ,
191193 limit = self .lmeval .limit ,
192194 device = "cuda:0" ,
195+ apply_chat_template = self .lmeval .apply_chat_template ,
193196 batch_size = self .lmeval .batch_size ,
194197 )
195198
You can’t perform that action at this time.
0 commit comments