Update llama.cpp

abetlen · abetlen · commit 186626d58e93 · 2023-09-01T14:26:13.000-04:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -294,13 +294,15 @@ class llama_context_params(Structure):
 #     enum llama_ftype ftype;      // quantize to this llama_ftype
 #     bool allow_requantize;       // allow quantizing non-f32/f16 tensors
 #     bool quantize_output_tensor; // quantize output.weight
+#     bool only_copy;              // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
 # } llama_model_quantize_params;
 class llama_model_quantize_params(Structure):
     _fields_ = [
         ("nthread", c_int),
         ("ftype", c_int),
         ("allow_requantize", c_bool),
         ("quantize_output_tensor", c_bool),
+        ("only_copy", c_bool),
     ]
 
 
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit bcce96ba4dd95482824700c4ce2455fe8c49055a
+Subproject commit 69fdbb9abc8907dd2a9ffdd840cba92d678a660a