@@ -212,13 +212,16 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
212212_lib .llama_get_kv_cache_token_count .argtypes = [llama_context_p ]
213213_lib .llama_get_kv_cache_token_count .restype = c_int
214214
215+
215216# Sets the current rng seed.
216217def llama_set_rng_seed (ctx : llama_context_p , seed : c_int ):
217218 return _lib .llama_set_rng_seed (ctx , seed )
218219
220+
219221_lib .llama_set_rng_seed .argtypes = [llama_context_p , c_int ]
220222_lib .llama_set_rng_seed .restype = None
221223
224+
222225# Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
223226def llama_get_state_size (ctx : llama_context_p ) -> c_size_t :
224227 return _lib .llama_get_state_size (ctx )
@@ -249,6 +252,44 @@ def llama_set_state_data(ctx: llama_context_p, src) -> c_size_t:
249252_lib .llama_set_state_data .restype = c_size_t
250253
251254
255+ # Save/load session file
256+ def llama_load_session_file (
257+ ctx : llama_context_p ,
258+ path_session : bytes ,
259+ tokens_out ,
260+ n_token_capacity : c_size_t ,
261+ n_token_count_out ,
262+ ) -> c_size_t :
263+ return _lib .llama_load_session_file (
264+ ctx , path_session , tokens_out , n_token_capacity , n_token_count_out
265+ )
266+
267+
268+ _lib .llama_load_session_file .argtypes = [
269+ llama_context_p ,
270+ c_char_p ,
271+ llama_token_p ,
272+ c_size_t ,
273+ POINTER (c_size_t ),
274+ ]
275+ _lib .llama_load_session_file .restype = c_size_t
276+
277+
278+ def llama_save_session_file (
279+ ctx : llama_context_p , path_session : bytes , tokens , n_token_count : c_size_t
280+ ) -> c_size_t :
281+ return _lib .llama_save_session_file (ctx , path_session , tokens , n_token_count )
282+
283+
284+ _lib .llama_save_session_file .argtypes = [
285+ llama_context_p ,
286+ c_char_p ,
287+ llama_token_p ,
288+ c_size_t ,
289+ ]
290+ _lib .llama_save_session_file .restype = c_size_t
291+
292+
252293# Run the llama inference to obtain the logits and probabilities for the next token.
253294# tokens + n_tokens is the provided batch of new tokens to process
254295# n_past is the number of tokens to use from previous eval calls
0 commit comments