Skip to content

Commit 4fc69b5

Browse files
authored
chore: fix milvus cache unit test (#612)
* chore: fix milvus cache unit test Signed-off-by: Huamin Chen <hchen@redhat.com> * fix pre-commit Signed-off-by: Huamin Chen <hchen@redhat.com> * fix dashboard container build Signed-off-by: Huamin Chen <hchen@redhat.com> --------- Signed-off-by: Huamin Chen <hchen@redhat.com>
1 parent 025e69a commit 4fc69b5

File tree

2 files changed

+65
-13
lines changed

2 files changed

+65
-13
lines changed

dashboard/backend/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Stage 1: Build frontend with Node.js
22
FROM node:20-alpine AS frontend-builder
33
WORKDIR /app/frontend
4-
COPY dashboard/frontend/package.json dashboard/frontend/tsconfig.json dashboard/frontend/tsconfig.node.json dashboard/frontend/vite.config.ts ./
4+
COPY dashboard/frontend/package.json dashboard/frontend/package-lock.json dashboard/frontend/tsconfig.json dashboard/frontend/tsconfig.node.json dashboard/frontend/vite.config.ts ./
55
COPY dashboard/frontend/src ./src
66
COPY dashboard/frontend/public ./public
77
COPY dashboard/frontend/index.html ./

src/semantic-router/pkg/cache/milvus_cache.go

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -728,8 +728,21 @@ func (c *MilvusCache) FindSimilarWithThreshold(model string, query string, thres
728728
}
729729

730730
// Cache Hit
731+
// Milvus automatically includes the primary key in search results but order is non-deterministic
732+
// Check which field is the response_body by detecting if field[0] is an MD5 hash
733+
responseBodyFieldIndex := 0
734+
if len(searchResult[0].Fields) > 1 {
735+
if testCol, ok := searchResult[0].Fields[0].(*entity.ColumnVarChar); ok && testCol.Len() > 0 {
736+
testVal := testCol.Data()[0]
737+
// If field[0] is exactly 32 hex chars, it's the ID hash, so response_body is in field[1]
738+
if len(testVal) == 32 && isHexString(testVal) {
739+
responseBodyFieldIndex = 1
740+
}
741+
}
742+
}
743+
731744
var responseBody []byte
732-
responseBodyColumn, ok := searchResult[0].Fields[0].(*entity.ColumnVarChar)
745+
responseBodyColumn, ok := searchResult[0].Fields[responseBodyFieldIndex].(*entity.ColumnVarChar)
733746
if ok && responseBodyColumn.Len() > 0 {
734747
responseBody = []byte(responseBodyColumn.Data()[0])
735748
}
@@ -782,21 +795,34 @@ func (c *MilvusCache) GetAllEntries(ctx context.Context) ([]string, [][]float32,
782795
return nil, nil, fmt.Errorf("milvus query all failed: %w", err)
783796
}
784797

785-
if len(queryResult) < 2 {
798+
// Milvus automatically includes the primary key but column order may vary
799+
// We requested ["request_id", embedding_field], so we expect 2-3 columns
800+
// If 3 columns: primary key was auto-included, adjust indices
801+
requestIDColIndex := 0
802+
embeddingColIndex := 1
803+
expectedMinCols := 2
804+
805+
if len(queryResult) >= 3 {
806+
// Primary key was auto-included, adjust indices
807+
requestIDColIndex = 1
808+
embeddingColIndex = 2
809+
}
810+
811+
if len(queryResult) < expectedMinCols {
786812
logging.Infof("MilvusCache.GetAllEntries: no entries found or incomplete result")
787813
return []string{}, [][]float32{}, nil
788814
}
789815

790-
// Extract request IDs (first column)
791-
requestIDColumn, ok := queryResult[0].(*entity.ColumnVarChar)
816+
// Extract request IDs
817+
requestIDColumn, ok := queryResult[requestIDColIndex].(*entity.ColumnVarChar)
792818
if !ok {
793-
return nil, nil, fmt.Errorf("unexpected request_id column type: %T", queryResult[0])
819+
return nil, nil, fmt.Errorf("unexpected request_id column type: %T", queryResult[requestIDColIndex])
794820
}
795821

796-
// Extract embeddings (second column)
797-
embeddingColumn, ok := queryResult[1].(*entity.ColumnFloatVector)
822+
// Extract embeddings
823+
embeddingColumn, ok := queryResult[embeddingColIndex].(*entity.ColumnFloatVector)
798824
if !ok {
799-
return nil, nil, fmt.Errorf("unexpected embedding column type: %T", queryResult[1])
825+
return nil, nil, fmt.Errorf("unexpected embedding column type: %T", queryResult[embeddingColIndex])
800826
}
801827

802828
if requestIDColumn.Len() != embeddingColumn.Len() {
@@ -830,6 +856,16 @@ func (c *MilvusCache) GetAllEntries(ctx context.Context) ([]string, [][]float32,
830856
return requestIDs, embeddings, nil
831857
}
832858

859+
// isHexString checks if a string contains only hexadecimal characters
860+
func isHexString(s string) bool {
861+
for _, c := range s {
862+
if (c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F') {
863+
return false
864+
}
865+
}
866+
return true
867+
}
868+
833869
// GetByID retrieves a document from Milvus by its request ID
834870
// This is much more efficient than FindSimilar when you already know the ID
835871
// Used by hybrid cache to fetch documents after local HNSW search
@@ -863,12 +899,28 @@ func (c *MilvusCache) GetByID(ctx context.Context, requestID string) ([]byte, er
863899
return nil, fmt.Errorf("document not found: %s", requestID)
864900
}
865901

866-
// Extract response body (first column since we only requested "response_body")
867-
responseBodyColumn, ok := queryResult[0].(*entity.ColumnVarChar)
902+
// Milvus automatically includes the primary key but the column order is non-deterministic
903+
// We need to find which column is the response_body by checking which is NOT the primary key (32-char hash)
904+
responseBodyColIndex := 0
905+
if len(queryResult) > 1 {
906+
// Check if column[0] looks like an MD5 hash (32 hex chars)
907+
if testCol, ok := queryResult[0].(*entity.ColumnVarChar); ok && testCol.Len() > 0 {
908+
testVal, _ := testCol.ValueByIdx(0)
909+
// If it's exactly 32 chars and all hex, it's likely the ID hash
910+
if len(testVal) == 32 && isHexString(testVal) {
911+
responseBodyColIndex = 1 // response_body is in column 1
912+
} else {
913+
responseBodyColIndex = 0 // response_body is in column 0
914+
}
915+
}
916+
}
917+
918+
// Extract response body
919+
responseBodyColumn, ok := queryResult[responseBodyColIndex].(*entity.ColumnVarChar)
868920
if !ok {
869-
logging.Debugf("MilvusCache.GetByID: unexpected response_body column type: %T", queryResult[0])
921+
logging.Debugf("MilvusCache.GetByID: unexpected response_body column type: %T", queryResult[responseBodyColIndex])
870922
metrics.RecordCacheOperation("milvus", "get_by_id", "error", time.Since(start).Seconds())
871-
return nil, fmt.Errorf("invalid response_body column type: %T", queryResult[0])
923+
return nil, fmt.Errorf("invalid response_body column type: %T", queryResult[responseBodyColIndex])
872924
}
873925

874926
if responseBodyColumn.Len() == 0 {

0 commit comments

Comments
 (0)