Skip to content

Commit 6ff1118

Browse files
brian-dellabettaHDCharles
authored andcommitted
neuralmagic/calibration dataset
Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
1 parent 3396166 commit 6ff1118

File tree

5 files changed

+61
-16
lines changed

5 files changed

+61
-16
lines changed

tests/e2e/e2e_utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,21 @@ def data_collator(batch):
6262

6363
oneshot_kwargs["data_collator"] = data_collator
6464

65+
elif "calibration" in dataset_id:
66+
67+
def data_collator(batch):
68+
assert len(batch) == 1
69+
return {
70+
key: (
71+
torch.tensor(value)
72+
if key != "pixel_values"
73+
else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
74+
)
75+
for key, value in batch[0].items()
76+
}
77+
78+
oneshot_kwargs["data_collator"] = data_collator
79+
6580
oneshot_kwargs["model"] = loaded_model
6681
if recipe:
6782
oneshot_kwargs["recipe"] = recipe

tests/lmeval/configs/vl_fp8_dynamic_per_token.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ lmeval:
1313
num_fewshot: 0
1414
batch_size: 100
1515
limit: 100
16-
# test runs in 26m
1716
# dense model achieves exact_match accuracy of 0.530
1817
# dense model achieves relaxed_accuracy of 0.780
1918
# dense model achieves anywhere_accuracy of 0.800

tests/lmeval/configs/vl_int8_w8a8_dynamic_per_token.yaml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ model: Qwen/Qwen3-VL-8B-Instruct
33
model_class: Qwen3VLForConditionalGeneration
44
scheme: INT8_dyn_per_token
55
recipe: tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml
6-
dataset_id: lmms-lab/flickr30k
7-
dataset_split: "test[:512]"
6+
dataset_id: neuralmagic/calibration
7+
dataset_config: LLM
8+
dataset_split: "train[:512]"
89
lmeval:
910
model: "hf-multimodal"
1011
model_args:
@@ -15,11 +16,10 @@ lmeval:
1516
num_fewshot: 0
1617
batch_size: 100
1718
limit: 100
18-
# test runs in m
19-
# dense model achieves exact_match accuracy of 0.
20-
# dense model achieves relaxed_accuracy of 0.
21-
# dense model achieves anywhere_accuracy of 0.
19+
# dense model achieves exact_match accuracy of 0.520
20+
# dense model achieves relaxed_accuracy of 0.780
21+
# dense model achieves anywhere_accuracy of 0.800
2222
metrics:
23-
exact_match,none: 0.
24-
relaxed_accuracy,none: 0.
25-
anywhere_accuracy,none: 0.
23+
exact_match,none: 0.550
24+
relaxed_accuracy,none: 0.770
25+
anywhere_accuracy,none: 0.770

tests/lmeval/configs/vl_w4a16_actorder_weight.yaml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,23 @@ model: Qwen/Qwen3-VL-8B-Instruct
33
model_class: Qwen3VLForConditionalGeneration
44
scheme: W4A16_actorder_weight
55
recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml
6-
dataset_id: lmms-lab/flickr30k
7-
dataset_split: "test[:512]"
6+
dataset_id: neuralmagic/calibration
7+
dataset_config: LLM
8+
dataset_split: "train[:512]"
89
lmeval:
910
model: "hf-multimodal"
1011
model_args:
1112
dtype: bfloat16
1213
convert_img_format: True
13-
task: mmmu_val_literature
14+
task: chartqa
1415
apply_chat_template: True
1516
num_fewshot: 0
16-
batch_size: 8
17-
# dense model achieves accuracy of 0.8333
17+
batch_size: 100
18+
limit: 100
19+
# dense model achieves exact_match accuracy of 0.520
20+
# dense model achieves relaxed_accuracy of 0.780
21+
# dense model achieves anywhere_accuracy of 0.800
1822
metrics:
19-
acc,none: 0.800
23+
exact_match,none: 0.540
24+
relaxed_accuracy,none: 0.780
25+
anywhere_accuracy,none: 0.800

tests/testing_utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,31 @@ def process(sample):
285285
"images": sample["image"],
286286
}
287287

288+
# "neuralmagic/calibration"
289+
elif ds_name == "calibration":
290+
291+
def process(example):
292+
messages = []
293+
for message in example["messages"]:
294+
messages.append(
295+
{
296+
"role": message["role"],
297+
"content": [{"type": "text", "text": message["content"]}],
298+
}
299+
)
300+
301+
return processor.apply_chat_template(
302+
messages,
303+
return_tensors="pt",
304+
padding=False,
305+
truncation=True,
306+
max_length=max_seq_length,
307+
tokenize=True,
308+
add_special_tokens=False,
309+
return_dict=True,
310+
add_generation_prompt=False,
311+
)
312+
288313
else:
289314
raise NotImplementedError(f"Cannot preprocess dataset {ds.info.dataset_name}")
290315

0 commit comments

Comments
 (0)