@@ -24,16 +24,7 @@ semantic_cache:
2424 # Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context)
2525 # Default: "bert" (fastest, lowest memory)
2626 embedding_model : " bert"
27- # HNSW index configuration (for memory backend only)
28- use_hnsw : true # Enable HNSW index for faster similarity search
29- hnsw_m : 16 # Number of bi-directional links (higher = better recall, more memory)
30- hnsw_ef_construction : 200 # Construction parameter (higher = better quality, slower build)
3127
32- # Hybrid cache configuration (when backend_type: "hybrid")
33- # Combines in-memory HNSW for fast search with Milvus for scalable storage
34- # max_memory_entries: 100000 # Max entries in HNSW index (default: 100,000)
35- # backend_config_path: "config/milvus.yaml" # Path to Milvus config
36-
3728tools :
3829 enabled : true
3930 top_k : 3
@@ -223,7 +214,7 @@ router:
223214 traditional_attention_dropout_prob : 0.1 # Traditional model attention dropout probability
224215 tie_break_confidence : 0.5 # Confidence value for tie-breaking situations
225216
226- default_model : openai/gpt-oss-20b
217+ default_model : qwen3
227218
228219# Reasoning family configurations
229220reasoning_families :
@@ -246,7 +237,7 @@ reasoning_families:
246237default_reasoning_effort : high
247238
248239# Enable OpenAI Responses API adapter (experimental)
249- enable_responses_adapter : false
240+ enable_responses_adapter : true
250241
251242# API Configuration
252243api :
0 commit comments