File tree Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Original file line number Diff line number Diff line change @@ -12,7 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212- (build-system) Migrate from scikit-build to scikit-build-core
1313
1414### Fixed
15+
1516- Truncate max_tokens in create_completion so requested tokens doesn't exceed context size.
17+ - Temporarily disable cache for completion requests
1618
1719## [ v0.1.59]
1820
Original file line number Diff line number Diff line change @@ -831,7 +831,9 @@ def _create_completion(
831831 "logprobs is not supported for models created with logits_all=False"
832832 )
833833
834- if self .cache :
834+ # Temporarily disable usage of the cache
835+ # See: https://github.com/abetlen/llama-cpp-python/issues/348#issuecomment-1583072408
836+ if self .cache and False :
835837 try :
836838 cache_item = self .cache [prompt_tokens ]
837839 cache_prefix_len = Llama .longest_token_prefix (
@@ -1069,14 +1071,14 @@ def _create_completion(
10691071 }
10701072 ],
10711073 }
1072- if self .cache :
1074+ if self .cache and False :
10731075 if self .verbose :
10741076 print ("Llama._create_completion: cache save" , file = sys .stderr )
10751077 self .cache [prompt_tokens + completion_tokens ] = self .save_state ()
10761078 print ("Llama._create_completion: cache saved" , file = sys .stderr )
10771079 return
10781080
1079- if self .cache :
1081+ if self .cache and False :
10801082 if self .verbose :
10811083 print ("Llama._create_completion: cache save" , file = sys .stderr )
10821084 self .cache [prompt_tokens + completion_tokens ] = self .save_state ()
You can’t perform that action at this time.
0 commit comments