vllm-project
diff --git a/‎examples/quantization_w4a4_fp4/qwen_30b_a3b.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/quantization_w4a4_fp4/qwen_30b_a3b.py‎
Lines changed: 1 addition & 1 deletion
@@ -55,7 +55,7 @@ def tokenize(sample):
 #   * calibrate a global_scale for activations, which will be used to
 #       quantize activations to fp4 on the fly
 recipe = QuantizationModifier(
-    targets="Linear", scheme="NVFP4", ignore=["lm_head", "re:.*mlp.gate$"]
+    targets="Linear", scheme="W4A16", ignore=["lm_head", "re:.*mlp.gate$"]
 )
 
 # Apply quantization.
Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,7 @@ def tokenize(sample):`
`55`	`55`	`# * calibrate a global_scale for activations, which will be used to`
`56`	`56`	`# quantize activations to fp4 on the fly`
`57`	`57`	`recipe = QuantizationModifier(`
`58`		`- targets="Linear", scheme="NVFP4", ignore=["lm_head", "re:.*mlp.gate$"]`
	`58`	`+ targets="Linear", scheme="W4A16", ignore=["lm_head", "re:.*mlp.gate$"]`
`59`	`59`	`)`
`60`	`60`
`61`	`61`	`# Apply quantization.`