File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -77,7 +77,7 @@ def __init__(
7777 mul_mat_q : bool = True ,
7878 logits_all : bool = False ,
7979 embedding : bool = False ,
80- offload_kqv : bool = False ,
80+ offload_kqv : bool = True ,
8181 # Sampling Params
8282 last_n_tokens_size : int = 64 ,
8383 # LoRA Params
Original file line number Diff line number Diff line change @@ -90,7 +90,7 @@ class ModelSettings(BaseSettings):
9090 logits_all : bool = Field (default = True , description = "Whether to return logits." )
9191 embedding : bool = Field (default = True , description = "Whether to use embeddings." )
9292 offload_kqv : bool = Field (
93- default = False , description = "Whether to offload kqv to the GPU."
93+ default = True , description = "Whether to offload kqv to the GPU."
9494 )
9595 # Sampling Params
9696 last_n_tokens_size : int = Field (
You can’t perform that action at this time.
0 commit comments