fix gptq observer call

kylesayrs · kylesayrs · commit 96b3995ce9fe · 2025-10-09T13:39:35.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py b/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py
@@ -121,22 +121,23 @@ def quantize_weight(
         if actorder == ActivationOrdering.GROUP:
             # permute by activation order first, then update groups
             W, H, perm = _apply_activation_ordering(W, H)
-            scale, zero_point = observer(W, g_idx=None)
+            module.weight_g_idx = g_idx
+            scale, zero_point = observer(W)
 
             # use identity g_idx (invert permutation later)
 
         elif actorder == ActivationOrdering.WEIGHT:
             # update groups first, then permute by activation order
-            scale, zero_point = observer(W, g_idx=None)
+            scale, zero_point = observer(W)
             W, H, perm = _apply_activation_ordering(W, H)
 
             # permute g_idx to maintain identity mapping after unpermutation
             g_idx = g_idx[perm]
 
         else:
-            scale, zero_point = observer(W, g_idx=None)
+            scale, zero_point = observer(W)
     else:
-        scale, zero_point = observer(W, g_idx=None)
+        scale, zero_point = observer(W)
 
     # sparsity mask
     sparsity = tensor_sparsity(W)