File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -239,6 +239,7 @@ def __init__(
239239 n_ctx: Maximum context size.
240240 n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
241241 seed: Random seed. -1 for random.
242+ n_gpu_layers: Number of layers to offload to GPU (-ngl). If -1, all layers are offloaded.
242243 f16_kv: Use half-precision for key/value cache.
243244 logits_all: Return logits for all tokens, not just the last token.
244245 vocab_only: Only load the vocabulary no weights.
@@ -267,7 +268,7 @@ def __init__(
267268
268269 self .params = llama_cpp .llama_context_default_params ()
269270 self .params .n_ctx = n_ctx
270- self .params .n_gpu_layers = n_gpu_layers
271+ self .params .n_gpu_layers = 0x7FFFFFFF if n_gpu_layers == - 1 else n_gpu_layers # 0x7FFFFFFF is INT32 max, will be auto set to all layers
271272 self .params .seed = seed
272273 self .params .f16_kv = f16_kv
273274 self .params .logits_all = logits_all
You can’t perform that action at this time.
0 commit comments