Feat: fix-issue-336: Implement In-Tree Embedding Similarity Matching (#606)

Sophie8 · rootfs · web-flow · commit e59c52ea7dbf · 2025-11-07T20:15:58.000-05:00
* fix-issue-336

Signed-off-by: Sophie8 &lt;sw3237@nyu.edu&gt;

* fix-issue-336

Signed-off-by: Sophie8 &lt;sw3237@nyu.edu&gt;

* fix-issue-336: update classifier name

Signed-off-by: Sophie8 &lt;sw3237@nyu.edu&gt;

* fix-issue-336: update unit test

Signed-off-by: Sophie8 &lt;sw3237@nyu.edu&gt;

---------

Signed-off-by: Sophie8 &lt;sw3237@nyu.edu&gt;
Co-authored-by: Huamin Chen &lt;rootfs@users.noreply.github.com&gt;
diff --git a/src/semantic-router/pkg/classification/classifier.go b/src/semantic-router/pkg/classification/classifier.go
@@ -198,13 +198,15 @@ type PIIAnalysisResult struct {
 // Classifier handles text classification, model selection, and jailbreak detection functionality
 type Classifier struct {
 	// Dependencies - In-tree classifiers
-	categoryInitializer  CategoryInitializer
-	categoryInference    CategoryInference
-	jailbreakInitializer JailbreakInitializer
-	jailbreakInference   JailbreakInference
-	piiInitializer       PIIInitializer
-	piiInference         PIIInference
-	keywordClassifier    *KeywordClassifier
+	categoryInitializer         CategoryInitializer
+	categoryInference           CategoryInference
+	jailbreakInitializer        JailbreakInitializer
+	jailbreakInference          JailbreakInference
+	piiInitializer              PIIInitializer
+	piiInference                PIIInference
+	keywordClassifier           *KeywordClassifier
+	keywordEmbeddingInitializer EmbeddingClassifierInitializer
+	keywordEmbeddingClassifier  *EmbeddingClassifier
 
 	// Dependencies - MCP-based classifiers
 	mcpCategoryInitializer MCPCategoryInitializer
@@ -254,6 +256,13 @@ func withKeywordClassifier(keywordClassifier *KeywordClassifier) option {
 	}
 }
 
+func withKeywordEmbeddingClassifier(keywordEmbeddingInitializer EmbeddingClassifierInitializer, keywordEmbeddingClassifier *EmbeddingClassifier) option {
+	return func(c *Classifier) {
+		c.keywordEmbeddingInitializer = keywordEmbeddingInitializer
+		c.keywordEmbeddingClassifier = keywordEmbeddingClassifier
+	}
+}
+
 // initModels initializes the models for the classifier
 func initModels(classifier *Classifier) (*Classifier, error) {
 	// Initialize either in-tree OR MCP-based category classifier
@@ -279,6 +288,12 @@ func initModels(classifier *Classifier) (*Classifier, error) {
 		}
 	}
 
+	if classifier.IsKeywordEmbeddingClassifierEnabled() {
+		if err := classifier.initializeKeywordEmbeddingClassifier(); err != nil {
+			return nil, err
+		}
+	}
+
 	return classifier, nil
 }
 
@@ -320,6 +335,16 @@ func NewClassifier(cfg *config.RouterConfig, categoryMapping *CategoryMapping, p
 		options = append(options, withKeywordClassifier(keywordClassifier))
 	}
 
+	// Add keyword embedding classifier if configured
+	if len(cfg.EmbeddingRules) > 0 {
+		keywordEmbeddingClassifier, err := NewEmbeddingClassifier(cfg.EmbeddingRules)
+		if err != nil {
+			logging.Errorf("Failed to create keyword embedding classifier: %v", err)
+			return nil, err
+		}
+		options = append(options, withKeywordEmbeddingClassifier(createEmbeddingInitializer(), keywordEmbeddingClassifier))
+	}
+
 	// Add in-tree classifier if configured
 	if cfg.CategoryModel.ModelID != "" {
 		options = append(options, withCategory(categoryMapping, createCategoryInitializer(cfg.UseModernBERT), createCategoryInference(cfg.UseModernBERT)))
@@ -369,7 +394,17 @@ func (c *Classifier) ClassifyCategory(text string) (string, float64, error) {
 			return category, confidence, nil
 		}
 	}
-
+	// TODO: more sophiscated fusion engine needs to be designed and implemented to combine classifiers' results
+	// Try embedding based similarity classification if properly configured
+	if c.keywordEmbeddingClassifier != nil {
+		category, confidence, err := c.keywordEmbeddingClassifier.Classify(text)
+		if err != nil {
+			return "", 0.0, err
+		}
+		if category != "" {
+			return category, confidence, nil
+		}
+	}
 	// Try in-tree first if properly configured
 	if c.IsCategoryEnabled() && c.categoryInference != nil {
 		return c.classifyCategoryInTree(text)
diff --git a/src/semantic-router/pkg/classification/classifier_test.go b/src/semantic-router/pkg/classification/classifier_test.go
@@ -3579,3 +3579,107 @@ func BenchmarkUnifiedClassifier_BatchSizeComparison(b *testing.B) {
 		}
 	})
 }
+
+// EmbeddingClassifier unit tests
+var _ = Describe("EmbeddingClassifier", func() {
+	var origCalculate func(string, []string, int, string, int) (*candle_binding.BatchSimilarityOutput, error)
+
+	BeforeEach(func() {
+		origCalculate = calculateSimilarityBatch
+	})
+
+	AfterEach(func() {
+		calculateSimilarityBatch = origCalculate
+	})
+
+	It("classifies with mean aggregation", func() {
+		calculateSimilarityBatch = func(query string, candidates []string, topK int, modelType string, targetDim int) (*candle_binding.BatchSimilarityOutput, error) {
+			return &candle_binding.BatchSimilarityOutput{Matches: []candle_binding.BatchSimilarityMatch{{Index: 0, Similarity: 0.9}, {Index: 1, Similarity: 0.8}, {Index: 2, Similarity: 0.7}}}, nil
+		}
+
+		rules := []config.EmbeddingRule{{
+			Category:                  "cat1",
+			Keywords:                  []string{"science", "math"},
+			AggregationMethodConfiged: config.AggregationMethodMean,
+			SimilarityThreshold:       0.8,
+			Model:                     "auto",
+			Dimension:                 768,
+		}}
+
+		clf, err := NewEmbeddingClassifier(rules)
+		Expect(err).ToNot(HaveOccurred())
+
+		cat, score, err := clf.Classify("some text")
+		Expect(err).ToNot(HaveOccurred())
+		Expect(cat).To(Equal("cat1"))
+		Expect(score).To(BeNumerically("~", 0.8, 1e-6))
+	})
+
+	It("classifies with max aggregation", func() {
+		calculateSimilarityBatch = func(query string, candidates []string, topK int, modelType string, targetDim int) (*candle_binding.BatchSimilarityOutput, error) {
+			return &candle_binding.BatchSimilarityOutput{Matches: []candle_binding.BatchSimilarityMatch{{Index: 0, Similarity: 0.4}, {Index: 1, Similarity: 0.6}}}, nil
+		}
+
+		rules := []config.EmbeddingRule{{
+			Category:                  "cat2",
+			Keywords:                  []string{"x", "y"},
+			AggregationMethodConfiged: config.AggregationMethodMax,
+			SimilarityThreshold:       0.5,
+			Model:                     "auto",
+			Dimension:                 512,
+		}}
+
+		clf, err := NewEmbeddingClassifier(rules)
+		Expect(err).ToNot(HaveOccurred())
+
+		cat, score, err := clf.Classify("other text")
+		Expect(err).ToNot(HaveOccurred())
+		Expect(cat).To(Equal("cat2"))
+		Expect(score).To(BeNumerically("~", 0.6, 1e-6))
+	})
+
+	It("classifies with any aggregation", func() {
+		calculateSimilarityBatch = func(query string, candidates []string, topK int, modelType string, targetDim int) (*candle_binding.BatchSimilarityOutput, error) {
+			return &candle_binding.BatchSimilarityOutput{Matches: []candle_binding.BatchSimilarityMatch{{Index: 0, Similarity: 0.2}, {Index: 1, Similarity: 0.95}}}, nil
+		}
+
+		rules := []config.EmbeddingRule{{
+			Category:                  "cat3",
+			Keywords:                  []string{"p", "q"},
+			AggregationMethodConfiged: config.AggregationMethodAny,
+			SimilarityThreshold:       0.7,
+			Model:                     "auto",
+			Dimension:                 256,
+		}}
+
+		clf, err := NewEmbeddingClassifier(rules)
+		Expect(err).ToNot(HaveOccurred())
+
+		cat, score, err := clf.Classify("third text")
+		Expect(err).ToNot(HaveOccurred())
+		Expect(cat).To(Equal("cat3"))
+		Expect(score).To(BeNumerically("~", 0.7, 1e-6))
+	})
+
+	It("returns error when CalculateSimilarityBatch fails", func() {
+		calculateSimilarityBatch = func(query string, candidates []string, topK int, modelType string, targetDim int) (*candle_binding.BatchSimilarityOutput, error) {
+			return nil, errors.New("external failure")
+		}
+
+		rules := []config.EmbeddingRule{{
+			Category:                  "cat4",
+			Keywords:                  []string{"z"},
+			AggregationMethodConfiged: config.AggregationMethodMean,
+			SimilarityThreshold:       0.1,
+			Model:                     "auto",
+			Dimension:                 768,
+		}}
+
+		clf, err := NewEmbeddingClassifier(rules)
+		Expect(err).ToNot(HaveOccurred())
+
+		_, _, err = clf.Classify("will error")
+		Expect(err).To(HaveOccurred())
+		Expect(err.Error()).To(ContainSubstring("failed to calculate batch similarity"))
+	})
+})
diff --git a/src/semantic-router/pkg/classification/embedding_classifier.go b/src/semantic-router/pkg/classification/embedding_classifier.go
@@ -0,0 +1,152 @@
+package classification
+
+import (
+	"fmt"
+
+	candle_binding "github.com/vllm-project/semantic-router/candle-binding"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
+)
+
+// calculateSimilarityBatch is a package-level variable that points to the
+// actual implementation in the candle_binding package. It exists so tests can
+// override it.
+var calculateSimilarityBatch = candle_binding.CalculateSimilarityBatch
+
+// EmbeddingClassifierInitializer initializes KeywordEmbeddingClassifier for embedding based classification
+type EmbeddingClassifierInitializer interface {
+	Init(qwen3ModelPath string, gemmaModelPath string, useCPU bool) error
+}
+
+type ExternalModelBasedEmbeddingInitializer struct{}
+
+func (c *ExternalModelBasedEmbeddingInitializer) Init(qwen3ModelPath string, gemmaModelPath string, useCPU bool) error {
+	err := candle_binding.InitEmbeddingModels(qwen3ModelPath, gemmaModelPath, useCPU)
+	if err != nil {
+		return err
+	}
+	logging.Infof("Initialized KeywordEmbedding classifier with qwen3 model path %q and gemma model path %s", qwen3ModelPath, gemmaModelPath)
+	return nil
+}
+
+// createEmbeddingInitializer creates the appropriate keyword embedding initializer based on configuration
+func createEmbeddingInitializer() EmbeddingClassifierInitializer {
+	return &ExternalModelBasedEmbeddingInitializer{}
+}
+
+type EmbeddingClassifier struct {
+	rules []config.EmbeddingRule
+}
+
+// NewKeywordClassifier creates a new KeywordEmbeddingClassifier.
+func NewEmbeddingClassifier(cfgRules []config.EmbeddingRule) (*EmbeddingClassifier, error) {
+	return &EmbeddingClassifier{rules: cfgRules}, nil
+}
+
+// IsKeywordEmbeddingClassifierEnabled checks if Keyword embedding classification rules are properly configured
+func (c *Classifier) IsKeywordEmbeddingClassifierEnabled() bool {
+	return len(c.Config.EmbeddingRules) > 0
+}
+
+// initializeKeywordEmbeddingClassifier initializes the KeywordEmbedding classification model
+func (c *Classifier) initializeKeywordEmbeddingClassifier() error {
+	if !c.IsKeywordEmbeddingClassifierEnabled() || c.keywordEmbeddingInitializer == nil {
+		return fmt.Errorf("keyword embedding similarity match is not properly configured")
+	}
+	return c.keywordEmbeddingInitializer.Init(c.Config.InlineModels.Qwen3ModelPath, c.Config.InlineModels.GemmaModelPath, c.Config.InlineModels.EmbeddingModels.UseCPU)
+}
+
+// Classify performs keyword-based embedding similarity classification on the given text.
+func (c *EmbeddingClassifier) Classify(text string) (string, float64, error) {
+	var bestScore float32
+	var mostMatchedCategory string
+	for _, rule := range c.rules {
+		matched, aggregatedScore, err := c.matches(text, rule) // Error handled
+		if err != nil {
+			return "", 0.0, err // Propagate error
+		}
+		if matched {
+			if len(rule.Keywords) > 0 {
+				logging.Infof("Keyword-based embedding similarity classification matched category %q with keywords: %v, confidence score %s", rule.Category, rule.Keywords, aggregatedScore)
+			} else {
+				logging.Infof("Keyword-based embedding similarity classification do not match category %q with keywords: %v, confidence score %s", rule.Category, rule.Keywords, aggregatedScore)
+			}
+			if aggregatedScore > bestScore {
+				bestScore = aggregatedScore
+				mostMatchedCategory = rule.Category
+			}
+		}
+	}
+	return mostMatchedCategory, float64(bestScore), nil
+}
+
+// matches checks if the text matches the given keyword rule.
+func (c *EmbeddingClassifier) matches(text string, rule config.EmbeddingRule) (bool, float32, error) {
+	// Validate input
+	if text == "" {
+		return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: query must be provided")
+	}
+	if len(rule.Keywords) == 0 {
+		return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: keywords must be provided")
+	}
+	// Set defaults
+	if rule.Dimension == 0 {
+		rule.Dimension = 768 // Default to full dimension
+	}
+	if rule.Model == "auto" && rule.QualityPriority == 0 && rule.LatencyPriority == 0 {
+		rule.QualityPriority = 0.5
+		rule.LatencyPriority = 0.5
+	}
+
+	// Validate dimension
+	validDimensions := map[int]bool{128: true, 256: true, 512: true, 768: true, 1024: true}
+	if !validDimensions[rule.Dimension] {
+		return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: dimension must be one of: 128, 256, 512, 768, 1024 (got %d)", rule.Dimension)
+	}
+	// Calculate batch similarity
+	result, err := calculateSimilarityBatch(
+		text,
+		rule.Keywords,
+		0, // return scores for all the keywords
+		rule.Model,
+		rule.Dimension,
+	)
+	if err != nil {
+		return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: failed to calculate batch similarity: %w", err)
+	}
+	// Check for matches based on the aggregation method
+	switch rule.AggregationMethodConfiged {
+	case config.AggregationMethodMean:
+		var aggregatedScore float32
+		for _, match := range result.Matches {
+			aggregatedScore += match.Similarity
+		}
+		aggregatedScore /= float32(len(result.Matches))
+		if aggregatedScore >= rule.SimilarityThreshold {
+			return true, aggregatedScore, nil
+		} else {
+			return false, aggregatedScore, nil
+		}
+	case config.AggregationMethodMax:
+		var aggregatedScore float32
+		for _, match := range result.Matches {
+			if match.Similarity > aggregatedScore {
+				aggregatedScore = match.Similarity
+			}
+		}
+		if aggregatedScore >= rule.SimilarityThreshold {
+			return true, aggregatedScore, nil
+		} else {
+			return false, aggregatedScore, nil
+		}
+	case config.AggregationMethodAny:
+		for _, match := range result.Matches {
+			if match.Similarity >= rule.SimilarityThreshold {
+				return true, rule.SimilarityThreshold, nil
+			}
+		}
+		return false, 0.0, nil
+
+	}
+	return false, 0.0, fmt.Errorf("keyword-based embedding similarity classification: unsupported keyword rule aggregation method: %q", rule.AggregationMethodConfiged)
+}
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
@@ -81,6 +81,9 @@ type IntelligentRouting struct {
 	// Keyword-based classification rules
 	KeywordRules []KeywordRule `yaml:"keyword_rules,omitempty"`
 
+	// Embedding-based classification rules
+	EmbeddingRules []EmbeddingRule `yaml:"embedding_rules,omitempty"`
+
 	// Categories for routing queries
 	Categories []Category `yaml:"categories"`
 
@@ -199,6 +202,27 @@ type KeywordRule struct {
 	CaseSensitive bool     `yaml:"case_sensitive"`
 }
 
+// Aggregation method used in keyword embedding rule
+type AggregationMethod string
+
+const (
+	AggregationMethodMean AggregationMethod = "mean"
+	AggregationMethodMax  AggregationMethod = "max"
+	AggregationMethodAny  AggregationMethod = "any"
+)
+
+// EmbeddingRule defines a rule for keyword embedding based similarity match rule.
+type EmbeddingRule struct {
+	Category                  string            `yaml:"category"`
+	SimilarityThreshold       float32           `yaml:"threshold"`
+	Keywords                  []string          `yaml:"keywords"`
+	AggregationMethodConfiged AggregationMethod `yaml:"aggregation_mathod"`
+	Model                     string            `json:"model,omitempty"`            // "auto" (default), "qwen3", "gemma"
+	Dimension                 int               `json:"dimension,omitempty"`        // Target dimension: 768 (default), 512, 256, 128
+	QualityPriority           float32           `json:"quality_priority,omitempty"` // 0.0-1.0, only for "auto" model
+	LatencyPriority           float32           `json:"latency_priority,omitempty"` // 0.0-1.0, only for "auto" model
+}
+
 // APIConfig represents configuration for API endpoints
 type APIConfig struct {
 	// Batch classification configuration (zero-config auto-discovery)