Skip to content

Commit b08f5b1

Browse files
committed
fix: don't extract fields from lexical search indexes
1 parent 2a8f602 commit b08f5b1

File tree

3 files changed

+81
-35
lines changed

3 files changed

+81
-35
lines changed

src/helpers/assertVectorSearchFilterFieldsAreIndexed.ts

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,22 @@ export type VectorSearchIndex = {
2121
}
2222
>;
2323
};
24+
type: "vectorSearch";
2425
};
2526

2627
export function assertVectorSearchFilterFieldsAreIndexed({
27-
searchIndexes,
28+
vectorSearchIndexes,
2829
pipeline,
2930
logger,
3031
}: {
31-
searchIndexes: VectorSearchIndex[];
32+
vectorSearchIndexes: VectorSearchIndex[];
3233
pipeline: Record<string, unknown>[];
3334
logger: CompositeLogger;
3435
}): void {
35-
const searchIndexesWithFilterFields = searchIndexes.reduce<Record<string, string[]>>(
36-
(indexFieldMap, searchIndex) => {
36+
const searchIndexesWithFilterFields = vectorSearchIndexes
37+
// Ensure we only process vector search indexes and not lexical search ones
38+
.filter((index) => index.type === "vectorSearch")
39+
.reduce<Record<string, string[]>>((indexFieldMap, searchIndex) => {
3740
const filterFields = searchIndex.latestDefinition.fields
3841
.map<string | undefined>((field) => {
3942
return field.type === "filter" ? field.path : undefined;
@@ -42,9 +45,7 @@ export function assertVectorSearchFilterFieldsAreIndexed({
4245

4346
indexFieldMap[searchIndex.name] = filterFields;
4447
return indexFieldMap;
45-
},
46-
{}
47-
);
48+
}, {});
4849
for (const stage of pipeline) {
4950
if ("$vectorSearch" in stage) {
5051
const { $vectorSearch: vectorSearchStage } = stage as z.infer<typeof VectorSearchStage>;

src/tools/mongodb/read/aggregate.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ import {
2020

2121
export const AggregateArgs = {
2222
pipeline: z.array(z.union([AnyVectorSearchStage, VectorSearchStage])).describe(
23-
`An array of aggregation stages to execute.
23+
`An array of aggregation stages to execute.
2424
\`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline.
2525
### Usage Rules for \`$vectorSearch\`
26-
- **Unset embeddings:**
26+
- **Unset embeddings:**
2727
Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**.
2828
- **Pre-filtering:**
2929
If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index.
@@ -59,7 +59,9 @@ export class AggregateTool extends MongoDBToolBase {
5959
await this.assertOnlyUsesPermittedStages(pipeline);
6060
if (await this.session.isSearchSupported()) {
6161
assertVectorSearchFilterFieldsAreIndexed({
62-
searchIndexes: (await provider.getSearchIndexes(database, collection)) as VectorSearchIndex[],
62+
vectorSearchIndexes: (await provider.getSearchIndexes(database, collection)).filter(
63+
(index) => index.type === "vectorSearch"
64+
) as VectorSearchIndex[],
6365
pipeline,
6466
logger: this.session.logger,
6567
});

tests/unit/helpers/assertVectorSearchFilterFieldsAreIndexed.test.ts

Lines changed: 68 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,12 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
196196
})),
197197
],
198198
},
199+
type: "vectorSearch",
199200
},
200201
];
201202

202203
it("should not throw when all filter fields are indexed", () => {
203-
const searchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2", "field3"]);
204+
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2", "field3"]);
204205
const pipeline = [
205206
{
206207
$vectorSearch: {
@@ -219,15 +220,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
219220

220221
expect(() =>
221222
assertVectorSearchFilterFieldsAreIndexed({
222-
searchIndexes,
223+
vectorSearchIndexes,
223224
pipeline,
224225
logger: mockLogger,
225226
})
226227
).not.toThrow();
227228
});
228229

229230
it("should not throw when filter is empty", () => {
230-
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
231+
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
231232
const pipeline = [
232233
{
233234
$vectorSearch: {
@@ -243,15 +244,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
243244

244245
expect(() =>
245246
assertVectorSearchFilterFieldsAreIndexed({
246-
searchIndexes,
247+
vectorSearchIndexes,
247248
pipeline,
248249
logger: mockLogger,
249250
})
250251
).not.toThrow();
251252
});
252253

253254
it("should not throw when filter is not provided", () => {
254-
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
255+
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
255256
const pipeline = [
256257
{
257258
$vectorSearch: {
@@ -266,28 +267,28 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
266267

267268
expect(() =>
268269
assertVectorSearchFilterFieldsAreIndexed({
269-
searchIndexes,
270+
vectorSearchIndexes,
270271
pipeline,
271272
logger: mockLogger,
272273
})
273274
).not.toThrow();
274275
});
275276

276277
it("should not throw when pipeline has no $vectorSearch stage", () => {
277-
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
278+
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
278279
const pipeline = [{ $match: { status: "active" } }, { $limit: 10 }];
279280

280281
expect(() =>
281282
assertVectorSearchFilterFieldsAreIndexed({
282-
searchIndexes,
283+
vectorSearchIndexes,
283284
pipeline,
284285
logger: mockLogger,
285286
})
286287
).not.toThrow();
287288
});
288289

289290
it("should throw MongoDBError when filter field is not indexed", () => {
290-
const searchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2"]);
291+
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2"]);
291292
const pipeline = [
292293
{
293294
$vectorSearch: {
@@ -306,15 +307,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
306307

307308
expect(() =>
308309
assertVectorSearchFilterFieldsAreIndexed({
309-
searchIndexes,
310+
vectorSearchIndexes,
310311
pipeline,
311312
logger: mockLogger,
312313
})
313314
).toThrow(MongoDBError);
314315

315316
expect(() =>
316317
assertVectorSearchFilterFieldsAreIndexed({
317-
searchIndexes,
318+
vectorSearchIndexes,
318319
pipeline,
319320
logger: mockLogger,
320321
})
@@ -327,7 +328,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
327328
});
328329

329330
it("should throw MongoDBError with all unindexed fields listed", () => {
330-
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
331+
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
331332
const pipeline = [
332333
{
333334
$vectorSearch: {
@@ -347,7 +348,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
347348

348349
expect(() =>
349350
assertVectorSearchFilterFieldsAreIndexed({
350-
searchIndexes,
351+
vectorSearchIndexes,
351352
pipeline,
352353
logger: mockLogger,
353354
})
@@ -360,7 +361,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
360361
});
361362

362363
it("should handle nested $and and $or operators", () => {
363-
const searchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2", "field3"]);
364+
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2", "field3"]);
364365
const pipeline = [
365366
{
366367
$vectorSearch: {
@@ -383,15 +384,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
383384

384385
expect(() =>
385386
assertVectorSearchFilterFieldsAreIndexed({
386-
searchIndexes,
387+
vectorSearchIndexes,
387388
pipeline,
388389
logger: mockLogger,
389390
})
390391
).not.toThrow();
391392
});
392393

393394
it("should throw when nested filter contains unindexed field", () => {
394-
const searchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2"]);
395+
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1", "field2"]);
395396
const pipeline = [
396397
{
397398
$vectorSearch: {
@@ -414,7 +415,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
414415

415416
expect(() =>
416417
assertVectorSearchFilterFieldsAreIndexed({
417-
searchIndexes,
418+
vectorSearchIndexes,
418419
pipeline,
419420
logger: mockLogger,
420421
})
@@ -427,7 +428,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
427428
});
428429

429430
it("should log warning when index is not found in searchIndexes", () => {
430-
const searchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
431+
const vectorSearchIndexes = createMockSearchIndexes("myIndex", ["field1"]);
431432
const pipeline = [
432433
{
433434
$vectorSearch: {
@@ -444,7 +445,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
444445
];
445446

446447
assertVectorSearchFilterFieldsAreIndexed({
447-
searchIndexes,
448+
vectorSearchIndexes,
448449
pipeline,
449450
logger: mockLogger,
450451
});
@@ -459,7 +460,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
459460
});
460461

461462
it("should handle multiple $vectorSearch stages in pipeline", () => {
462-
const searchIndexes = [
463+
const vectorSearchIndexes = [
463464
...createMockSearchIndexes("index1", ["field1", "field2"]),
464465
...createMockSearchIndexes("index2", ["field3", "field4"]),
465466
];
@@ -493,15 +494,15 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
493494

494495
expect(() =>
495496
assertVectorSearchFilterFieldsAreIndexed({
496-
searchIndexes,
497+
vectorSearchIndexes,
497498
pipeline,
498499
logger: mockLogger,
499500
})
500501
).not.toThrow();
501502
});
502503

503504
it("should throw on second $vectorSearch stage if it has unindexed field", () => {
504-
const searchIndexes = [
505+
const vectorSearchIndexes = [
505506
...createMockSearchIndexes("index1", ["field1", "field2"]),
506507
...createMockSearchIndexes("index2", ["field3"]),
507508
];
@@ -534,7 +535,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
534535

535536
expect(() =>
536537
assertVectorSearchFilterFieldsAreIndexed({
537-
searchIndexes,
538+
vectorSearchIndexes,
538539
pipeline,
539540
logger: mockLogger,
540541
})
@@ -547,12 +548,13 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
547548
});
548549

549550
it("should handle search index with no filter fields", () => {
550-
const searchIndexes: VectorSearchIndex[] = [
551+
const vectorSearchIndexes: VectorSearchIndex[] = [
551552
{
552553
name: "myIndex",
553554
latestDefinition: {
554555
fields: [{ type: "vector" }],
555556
},
557+
type: "vectorSearch",
556558
},
557559
];
558560
const pipeline = [
@@ -572,7 +574,7 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
572574

573575
expect(() =>
574576
assertVectorSearchFilterFieldsAreIndexed({
575-
searchIndexes,
577+
vectorSearchIndexes,
576578
pipeline,
577579
logger: mockLogger,
578580
})
@@ -583,4 +585,45 @@ describe("#assertVectorSearchFilterFieldsAreIndexed", () => {
583585
)
584586
);
585587
});
588+
589+
it("should ignore atlas search indexes", () => {
590+
const vectorSearchIndexes: VectorSearchIndex[] = [
591+
...createMockSearchIndexes("index1", ["field1", "field2"]),
592+
// Atlas search index - it should be ignored by the validation
593+
// and not cause any errors
594+
{
595+
name: "atlasSearchIndex",
596+
latestDefinition: {
597+
analyzer: "lucene.standard",
598+
mappings: {
599+
dynamic: false,
600+
},
601+
},
602+
type: "search",
603+
} as unknown as VectorSearchIndex,
604+
];
605+
606+
const pipeline = [
607+
{
608+
$vectorSearch: {
609+
index: "index1",
610+
path: "embedding",
611+
queryVector: [1, 2, 3],
612+
numCandidates: 100,
613+
limit: 10,
614+
filter: {
615+
field1: "value",
616+
},
617+
},
618+
},
619+
];
620+
621+
expect(() =>
622+
assertVectorSearchFilterFieldsAreIndexed({
623+
vectorSearchIndexes,
624+
pipeline,
625+
logger: mockLogger,
626+
})
627+
).not.toThrow();
628+
});
586629
});

0 commit comments

Comments
 (0)