Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions src/helpers/assertVectorSearchFilterFieldsAreIndexed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,22 @@ export type VectorSearchIndex = {
}
>;
};
type: "vectorSearch";
};

export function assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger,
}: {
searchIndexes: VectorSearchIndex[];
vectorSearchIndexes: VectorSearchIndex[];
pipeline: Record<string, unknown>[];
logger: CompositeLogger;
}): void {
const searchIndexesWithFilterFields = searchIndexes.reduce<Record<string, string[]>>(
(indexFieldMap, searchIndex) => {
const searchIndexesWithFilterFields = vectorSearchIndexes
// Ensure we only process vector search indexes and not lexical search ones
.filter((index) => index.type === "vectorSearch")
.reduce<Record<string, string[]>>((indexFieldMap, searchIndex) => {
const filterFields = searchIndex.latestDefinition.fields
.map<string | undefined>((field) => {
return field.type === "filter" ? field.path : undefined;
Expand All @@ -42,9 +45,7 @@ export function assertVectorSearchFilterFieldsAreIndexed({

indexFieldMap[searchIndex.name] = filterFields;
return indexFieldMap;
},
{}
);
}, {});
for (const stage of pipeline) {
if ("$vectorSearch" in stage) {
const { $vectorSearch: vectorSearchStage } = stage as z.infer<typeof VectorSearchStage>;
Expand Down
8 changes: 5 additions & 3 deletions src/tools/mongodb/read/aggregate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ import {

export const AggregateArgs = {
pipeline: z.array(z.union([AnyVectorSearchStage, VectorSearchStage])).describe(
`An array of aggregation stages to execute.
`An array of aggregation stages to execute.
\`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline.
### Usage Rules for \`$vectorSearch\`
- **Unset embeddings:**
- **Unset embeddings:**
Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**.
- **Pre-filtering:**
If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index.
Expand Down Expand Up @@ -59,7 +59,9 @@ export class AggregateTool extends MongoDBToolBase {
await this.assertOnlyUsesPermittedStages(pipeline);
if (await this.session.isSearchSupported()) {
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes: (await provider.getSearchIndexes(database, collection)) as VectorSearchIndex[],
vectorSearchIndexes: (await provider.getSearchIndexes(database, collection)).filter(
(index) => index.type === "vectorSearch"
) as VectorSearchIndex[],
pipeline,
logger: this.session.logger,
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,12 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
})),
],
},
type: "vectorSearch",
},
];

it("should not throw when all filter fields are indexed", () => {
const searchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2", "field3"]);
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2", "field3"]);
const pipeline = [
{
$vectorSearch: {
Expand All @@ -219,15 +220,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
).not.toThrow();
});

it("should not throw when filter is empty", () => {
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const pipeline = [
{
$vectorSearch: {
Expand All @@ -243,15 +244,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
).not.toThrow();
});

it("should not throw when filter is not provided", () => {
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const pipeline = [
{
$vectorSearch: {
Expand All @@ -266,28 +267,28 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
).not.toThrow();
});

it("should not throw when pipeline has no $vectorSearch stage", () => {
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const pipeline = [{ $match: { status: "active" } }, { $limit: 10 }];

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
).not.toThrow();
});

it("should throw MongoDBError when filter field is not indexed", () => {
const searchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2"]);
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2"]);
const pipeline = [
{
$vectorSearch: {
Expand All @@ -306,15 +307,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
).toThrow(MongoDBError);

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
Expand All @@ -327,7 +328,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
});

it("should throw MongoDBError with all unindexed fields listed", () => {
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const pipeline = [
{
$vectorSearch: {
Expand All @@ -347,7 +348,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
Expand All @@ -360,7 +361,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
});

it("should handle nested $and and $or operators", () => {
const searchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2", "field3"]);
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2", "field3"]);
const pipeline = [
{
$vectorSearch: {
Expand All @@ -383,15 +384,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
).not.toThrow();
});

it("should throw when nested filter contains unindexed field", () => {
const searchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2"]);
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2"]);
const pipeline = [
{
$vectorSearch: {
Expand All @@ -414,7 +415,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
Expand All @@ -427,7 +428,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
});

it("should log warning when index is not found in searchIndexes", () => {
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
const pipeline = [
{
$vectorSearch: {
Expand All @@ -444,7 +445,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
];

assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
});
Expand All @@ -459,7 +460,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
});

it("should handle multiple $vectorSearch stages in pipeline", () => {
const searchIndexes = [
const vectorSearchIndexes = [
...createMockSearchIndexes("index1", ["field1", "field2"]),
...createMockSearchIndexes("index2", ["field3", "field4"]),
];
Expand Down Expand Up @@ -493,15 +494,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
).not.toThrow();
});

it("should throw on second $vectorSearch stage if it has unindexed field", () => {
const searchIndexes = [
const vectorSearchIndexes = [
...createMockSearchIndexes("index1", ["field1", "field2"]),
...createMockSearchIndexes("index2", ["field3"]),
];
Expand Down Expand Up @@ -534,7 +535,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
Expand All @@ -547,12 +548,13 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
});

it("should handle search index with no filter fields", () => {
const searchIndexes: VectorSearchIndex[] = [
const vectorSearchIndexes: VectorSearchIndex[] = [
{
name: "myIndex",
latestDefinition: {
fields: [{ type: "vector" }],
},
type: "vectorSearch",
},
];
const pipeline = [
Expand All @@ -572,7 +574,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
searchIndexes,
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
Expand All @@ -583,4 +585,45 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
)
);
});

it("should ignore atlas search indexes", () => {
const vectorSearchIndexes: VectorSearchIndex[] = [
...createMockSearchIndexes("index1", ["field1", "field2"]),
// Atlas search index - it should be ignored by the validation
// and not cause any errors
{
name: "atlasSearchIndex",
latestDefinition: {
analyzer: "lucene.standard",
mappings: {
dynamic: false,
},
},
type: "search",
} as unknown as VectorSearchIndex,
];

const pipeline = [
{
$vectorSearch: {
index: "index1",
path: "embedding",
queryVector: [1, 2, 3],
numCandidates: 100,
limit: 10,
filter: {
field1: "value",
},
},
},
];

expect(() =>
assertVectorSearchFilterFieldsAreIndexed({
vectorSearchIndexes,
pipeline,
logger: mockLogger,
})
).not.toThrow();
});
});
Loading