lmolina · lmolina · Nov 8, 2025
diff --git a/README.md b/README.md
@@ -120,6 +120,36 @@ MCP_TRANSPORT=streamable-http btmcp-server  # Runs on http://127.0.0.1:8000
 - `MCP_HOST` - Host for HTTP mode (default: `127.0.0.1`)
 - `MCP_PORT` - Port for HTTP mode (default: `8000`)
 - `BTMCP_CACHE_DIR` - Custom cache directory (default: `<project>/.cache/`)
+- `BTMCP_ENABLE_SEMANTIC` - Enable semantic search (`true` or `false`, default: `true`)
+- `BTMCP_MODEL_NAME` - Sentence-transformers model name (default: `all-MiniLM-L6-v2`)
+
+### Customizing the Semantic Model
+
+You can configure which sentence-transformers model to use for semantic search:
+
+```bash
+# Use a larger, more accurate model (requires more memory and disk space)
+BTMCP_MODEL_NAME="all-mpnet-base-v2" btmcp-server
+
+# Use a smaller, faster model (requires less memory and disk space)
+BTMCP_MODEL_NAME="all-MiniLM-L12-v2" btmcp-server
+
+# Disable semantic search entirely (BM25-only mode)
+BTMCP_ENABLE_SEMANTIC=false btmcp-server
+```
+
+**Popular model options:**
+
+| Model | Embedding Size | Model Size | Speed | Accuracy |
+|-------|----------------|------------|-------|----------|
+| `all-MiniLM-L6-v2` (default) | 384 | ~80MB | Fast | Good |
+| `all-MiniLM-L12-v2` | 384 | ~120MB | Medium | Better |
+| `all-mpnet-base-v2` | 768 | ~420MB | Slower | Best |
+| `multi-qa-MiniLM-L6-cos-v1` | 384 | ~80MB | Fast | Good (Q&A optimized) |
+
+**Note:** When you change the model, the cache will be invalidated and PDFs will be re-indexed with the new model's embeddings.
+
+For a complete list of available models, see the [Sentence Transformers documentation](https://www.sbert.net/docs/pretrained_models.html).
 
 ---
 
@@ -547,7 +577,7 @@ PDF Files (specs/)
       • Tables & Figures
   → Indexer:
       • BM25 tokenization
-      • Semantic embeddings (all-MiniLM-L6-v2)
+      • Semantic embeddings (configurable model, default: all-MiniLM-L6-v2)
   → Cache (pickle):
       • BM25 index
       • Embeddings

diff --git a/src/btmcp/indexer.py b/src/btmcp/indexer.py
@@ -29,11 +29,17 @@
 class Indexer:
     """Index PDF content with BM25 keyword search and semantic search."""
 
-    def __init__(self, enable_semantic: bool = True):
+    def __init__(
+        self,
+        enable_semantic: bool = True,
+        model_name: str = "all-MiniLM-L6-v2",
+    ):
         """Initialize indexer.
 
         :param enable_semantic: Enable semantic search with embeddings (if available)
         :type enable_semantic: bool
+        :param model_name: Name of the sentence-transformers model to use
+        :type model_name: str
         """
         self.chunks: list[dict[str, Any]] = []
         self.bm25: BM25Okapi | None = None
@@ -43,15 +49,16 @@ def __init__(self, enable_semantic: bool = True):
         self.enable_semantic = enable_semantic and SEMANTIC_AVAILABLE
         self.embeddings: np.ndarray | None = None
         self.embedding_model: SentenceTransformer | None = None
+        self.model_name = model_name
 
         # Metadata extraction
         self.metadata_extractor = MetadataExtractor()
 
         if self.enable_semantic:
             if SEMANTIC_AVAILABLE and SentenceTransformer is not None:
-                # Load lightweight model (80MB, 384 dimensions)
-                self.embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-                logger.info("Semantic search enabled with all-MiniLM-L6-v2")
+                # Load the specified model
+                self.embedding_model = SentenceTransformer(model_name)
+                logger.info(f"Semantic search enabled with {model_name}")
             else:
                 logger.warning(
                     "Semantic search requested but sentence-transformers not available"
@@ -547,6 +554,7 @@ def save_cache(self, cache_path: Path | str) -> None:
             "bm25": self.bm25,
             "enable_semantic": self.enable_semantic,
             "embeddings": self.embeddings,
+            "model_name": self.model_name,
         }
 
         with cache_path.open("wb") as f:
@@ -576,14 +584,17 @@ def load_cache(self, cache_path: Path | str) -> None:
         if "enable_semantic" in cache_data:
             self.enable_semantic = cache_data["enable_semantic"]
             print(f"  Semantic search: {self.enable_semantic}")
+        if "model_name" in cache_data:
+            self.model_name = cache_data["model_name"]
+            print(f"  Model name: {self.model_name}")
         if "embeddings" in cache_data:
             self.embeddings = cache_data["embeddings"]
             if self.embeddings is not None:
                 print(f"  Loaded embeddings: {self.embeddings.shape}")
             # Reinitialize embedding model if we have embeddings
             if self.embeddings is not None and self.embedding_model is None:
-                print("  Loading embedding model...")
-                self.embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+                print(f"  Loading embedding model: {self.model_name}...")
+                self.embedding_model = SentenceTransformer(self.model_name)
 
     def is_cache_fresh(
         self, cache_path: Path | str, pdf_paths: Sequence[Path | str]

diff --git a/src/btmcp/server.py b/src/btmcp/server.py
@@ -52,6 +52,23 @@ def get_cache_path() -> Path:
     return cache_dir / "index.cache"
 
 
+def get_semantic_config() -> dict[str, str | bool]:
+    """Get semantic search configuration from environment variables.
+
+    :return: Configuration dictionary with enable_semantic and model_name
+    :rtype: dict[str, str | bool]
+    """
+    enable_semantic_env = os.getenv("BTMCP_ENABLE_SEMANTIC", "true").lower()
+    enable_semantic = enable_semantic_env in ("true", "1", "yes")
+
+    model_name = os.getenv("BTMCP_MODEL_NAME", "all-MiniLM-L6-v2")
+
+    return {
+        "enable_semantic": enable_semantic,
+        "model_name": model_name,
+    }
+
+
 def get_security_settings() -> TransportSecuritySettings:
     """Get transport security settings for DNS rebinding protection.
 
@@ -105,6 +122,7 @@ def validate_transport(transport: str) -> Literal["stdio", "sse", "streamable-ht
 
 # Get server configuration
 config = get_server_config()
+semantic_config = get_semantic_config()
 
 # Create MCP server instance with security settings and configured host/port
 mcp = FastMCP(
@@ -114,8 +132,11 @@ def validate_transport(transport: str) -> Literal["stdio", "sse", "streamable-ht
     transport_security=get_security_settings(),
 )
 
-# Initialize spec server
-spec_server = SpecServer()
+# Initialize spec server with semantic configuration
+spec_server = SpecServer(
+    enable_semantic=bool(semantic_config["enable_semantic"]),
+    model_name=str(semantic_config["model_name"]),
+)
 
 
 @mcp.tool()

diff --git a/src/btmcp/spec_server.py b/src/btmcp/spec_server.py
@@ -22,11 +22,25 @@ class PdfInfo(TypedDict):
 class SpecServer:
     """Server for loading and searching Bluetooth specifications."""
 
-    def __init__(self) -> None:
-        """Initialize spec server with indexer."""
-        self.indexer = Indexer()
+    def __init__(
+        self,
+        enable_semantic: bool = True,
+        model_name: str = "all-MiniLM-L6-v2",
+    ) -> None:
+        """Initialize spec server with indexer.
+
+        :param enable_semantic: Enable semantic search with embeddings (if available)
+        :type enable_semantic: bool
+        :param model_name: Name of the sentence-transformers model to use
+        :type model_name: str
+        """
+        self.indexer = Indexer(
+            enable_semantic=enable_semantic, model_name=model_name
+        )
         self.pdf_loader = PDFLoader()
         self.specs_dir: Path | None = None
+        self.enable_semantic = enable_semantic
+        self.model_name = model_name
 
     def load_pages(self, pages: list[dict[str, Any]], pdf_name: str) -> None:
         """Load PDF pages and build search index.
@@ -153,7 +167,9 @@ def rebuild_index(self) -> str:
             return "No PDF files found in specs directory"
 
         # Clear current index
-        self.indexer = Indexer()
+        self.indexer = Indexer(
+            enable_semantic=self.enable_semantic, model_name=self.model_name
+        )
 
         # Reload all PDFs
         total_pages = 0

diff --git a/tests/test_indexer.py b/tests/test_indexer.py
@@ -847,3 +847,72 @@ def test_indexer_handles_pages_without_section_headers():
 
     # Second chunk has section
     assert chunks[1]["metadata"]["section"] == "4.2.1"
+
+
+def test_indexer_custom_model_name():
+    """Test that custom model name can be specified."""
+    # Use a different smaller model for testing
+    custom_model = "all-MiniLM-L12-v2"
+    indexer = Indexer(enable_semantic=True, model_name=custom_model)
+
+    assert indexer.model_name == custom_model
+    assert indexer.enable_semantic is True
+    # Model should be loaded with custom name
+    assert indexer.embedding_model is not None
+
+
+def test_indexer_default_model_name():
+    """Test that default model name is used when not specified."""
+    indexer = Indexer(enable_semantic=True)
+
+    assert indexer.model_name == "all-MiniLM-L6-v2"
+    assert indexer.embedding_model is not None
+
+
+def test_indexer_model_name_persists_in_cache(tmp_path):
+    """Test that model name is saved to and loaded from cache."""
+    custom_model = "all-MiniLM-L12-v2"
+    indexer1 = Indexer(enable_semantic=True, model_name=custom_model)
+
+    pages = [
+        {"page": 1, "text": "GATT Service documentation."},
+    ]
+
+    chunks = indexer1.create_chunks(pages, pdf_name="test.pdf")
+    indexer1.build_index(chunks)
+
+    # Save cache
+    cache_file = tmp_path / "index.cache"
+    indexer1.save_cache(cache_file)
+
+    # Load into new indexer with different default model
+    indexer2 = Indexer(enable_semantic=True, model_name="different-model")
+    indexer2.load_cache(cache_file)
+
+    # Model name should be restored from cache
+    assert indexer2.model_name == custom_model
+
+
+def test_indexer_model_name_used_when_loading_embeddings(tmp_path):
+    """Test that correct model is loaded when restoring embeddings from cache."""
+    custom_model = "all-MiniLM-L12-v2"
+    indexer1 = Indexer(enable_semantic=True, model_name=custom_model)
+
+    pages = [
+        {"page": 1, "text": "GATT Service documentation."},
+    ]
+
+    chunks = indexer1.create_chunks(pages, pdf_name="test.pdf")
+    indexer1.build_index(chunks)
+
+    # Save cache
+    cache_file = tmp_path / "index.cache"
+    indexer1.save_cache(cache_file)
+
+    # Load into new indexer without initializing the model first
+    indexer2 = Indexer(enable_semantic=False)  # Start with semantic disabled
+    indexer2.load_cache(cache_file)
+
+    # Model should be loaded with the cached model name
+    assert indexer2.model_name == custom_model
+    assert indexer2.embedding_model is not None