@@ -219,6 +219,7 @@ def __init__(
219219 last_n_tokens_size : int = 64 ,
220220 lora_base : Optional [str ] = None ,
221221 lora_path : Optional [str ] = None ,
222+ low_vram : bool = False ,
222223 verbose : bool = True ,
223224 ):
224225 """Load a llama.cpp model from `model_path`.
@@ -260,6 +261,7 @@ def __init__(
260261 self .params .use_mmap = use_mmap if lora_path is None else False
261262 self .params .use_mlock = use_mlock
262263 self .params .embedding = embedding
264+ self .params .low_vram = low_vram
263265
264266 self .last_n_tokens_size = last_n_tokens_size
265267 self .n_batch = min (n_ctx , n_batch )
@@ -1447,6 +1449,7 @@ def __getstate__(self):
14471449 use_mmap = self .params .use_mmap ,
14481450 use_mlock = self .params .use_mlock ,
14491451 embedding = self .params .embedding ,
1452+ low_vram = self .params .low_vram ,
14501453 last_n_tokens_size = self .last_n_tokens_size ,
14511454 n_batch = self .n_batch ,
14521455 n_threads = self .n_threads ,
@@ -1470,6 +1473,7 @@ def __setstate__(self, state):
14701473 use_mmap = state ["use_mmap" ],
14711474 use_mlock = state ["use_mlock" ],
14721475 embedding = state ["embedding" ],
1476+ low_vram = state ["low_vram" ],
14731477 n_threads = state ["n_threads" ],
14741478 n_batch = state ["n_batch" ],
14751479 last_n_tokens_size = state ["last_n_tokens_size" ],
0 commit comments