Skip to content

Commit 96b3995

Browse files
committed
fix gptq observer call
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent a0b83b4 commit 96b3995

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,22 +121,23 @@ def quantize_weight(
121121
if actorder == ActivationOrdering.GROUP:
122122
# permute by activation order first, then update groups
123123
W, H, perm = _apply_activation_ordering(W, H)
124-
scale, zero_point = observer(W, g_idx=None)
124+
module.weight_g_idx = g_idx
125+
scale, zero_point = observer(W)
125126

126127
# use identity g_idx (invert permutation later)
127128

128129
elif actorder == ActivationOrdering.WEIGHT:
129130
# update groups first, then permute by activation order
130-
scale, zero_point = observer(W, g_idx=None)
131+
scale, zero_point = observer(W)
131132
W, H, perm = _apply_activation_ordering(W, H)
132133

133134
# permute g_idx to maintain identity mapping after unpermutation
134135
g_idx = g_idx[perm]
135136

136137
else:
137-
scale, zero_point = observer(W, g_idx=None)
138+
scale, zero_point = observer(W)
138139
else:
139-
scale, zero_point = observer(W, g_idx=None)
140+
scale, zero_point = observer(W)
140141

141142
# sparsity mask
142143
sparsity = tensor_sparsity(W)

0 commit comments

Comments
 (0)