ikawrakow
diff --git a/‎ggml/src/ggml-common.h‎
Lines changed: 10 additions & 5 deletions b/‎ggml/src/ggml-common.h‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎ggml/src/ggml-quants.c‎
Lines changed: 0 additions & 1 deletion b/‎ggml/src/ggml-quants.c‎
Lines changed: 0 additions & 1 deletion
@@ -236,6 +236,11 @@ typedef struct {
     int8_t qs[4*QK8_0];
 } block_q8_0_x4;
 static_assert(sizeof(block_q8_0_x4) == 4*sizeof(block_q8_0), "wrong q8_0_x4 block size/padding");
+typedef struct {
+    ggml_half d[8];
+    int8_t qs[8*QK8_0];
+} block_q8_0_r8;
+static_assert(sizeof(block_q8_0_r8) == 8*sizeof(block_q8_0), "wrong q8_0_r8 block size/padding");
 
 typedef struct {
     ggml_half d[4];        // deltas for 4 q4_0 blocks
@@ -534,12 +539,12 @@ typedef struct {
 static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
 
 typedef struct {
-    ggml_half d[4];
-    uint8_t scales_h[QK_K/32];
-    uint8_t scales_l[QK_K/16];
-    uint8_t qs[QK_K*2];
+    ggml_half d[8];
+    uint8_t scales_h[QK_K/16];
+    uint8_t scales_l[QK_K/ 8];
+    uint8_t qs[QK_K*4];
 } block_iq4_xs_r4;
-static_assert(sizeof(block_iq4_xs_r4) == 4*sizeof(ggml_half) + QK_K/32 + QK_K/16 + QK_K*2, "wrong iq4_xs_rs block size/padding");
+static_assert(sizeof(block_iq4_xs_r4) == 8*sizeof(block_iq4_xs), "wrong iq4_xs_rs block size/padding");
 
 typedef struct {
     uint8_t  scales[QK_K/32];
 
@@ -936,7 +936,6 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int64_t k)
 
 #if defined(__ARM_NEON)
     for (int i = 0; i < nb; i++) {
-        int i4 = i/4, ir = i%4;
         float32x4_t srcv [8];
         float32x4_t asrcv[8];
         float32x4_t amaxv[8];