@@ -165,12 +165,16 @@ class llama_token_data_array(Structure):
165165# int32_t n_gpu_layers; // number of layers to store in VRAM
166166# int32_t main_gpu; // the GPU that is used for scratch and small tensors
167167# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
168+
169+ # // ref: https://github.com/ggerganov/llama.cpp/pull/2054
170+ # float rope_freq_base; // RoPE base frequency
171+ # float rope_freq_scale; // RoPE frequency scaling factor
172+
168173# // called with a progress value between 0 and 1, pass NULL to disable
169174# llama_progress_callback progress_callback;
170175# // context pointer passed to the progress callback
171176# void * progress_callback_user_data;
172177
173-
174178# // Keep the booleans together to avoid misalignment during copy-by-value.
175179# bool low_vram; // if true, reduce VRAM usage at the cost of performance
176180# bool f16_kv; // use fp16 for KV cache
@@ -188,6 +192,8 @@ class llama_context_params(Structure):
188192 ("n_gpu_layers" , c_int32 ),
189193 ("main_gpu" , c_int32 ),
190194 ("tensor_split" , c_float * LLAMA_MAX_DEVICES .value ),
195+ ("rope_freq_base" , c_float ),
196+ ("rope_freq_scale" , c_float ),
191197 ("progress_callback" , llama_progress_callback ),
192198 ("progress_callback_user_data" , c_void_p ),
193199 ("low_vram" , c_bool ),
0 commit comments