Skip to content

Commit 2a499f4

Browse files
chore: adds field embeddings validation for quantization "none" and warn when vectorSearch is not configured correctly (#717)
1 parent 2764d04 commit 2a499f4

File tree

13 files changed

+291
-171
lines changed

13 files changed

+291
-171
lines changed

src/common/config.ts

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ export function warnAboutDeprecatedOrUnknownCliArgs(
319319
if (knownArgs.connectionString) {
320320
usedDeprecatedArgument = true;
321321
warn(
322-
"The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string."
322+
"Warning: The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string."
323323
);
324324
}
325325

@@ -333,15 +333,15 @@ export function warnAboutDeprecatedOrUnknownCliArgs(
333333
if (!valid) {
334334
usedInvalidArgument = true;
335335
if (suggestion) {
336-
warn(`Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`);
336+
warn(`Warning: Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`);
337337
} else {
338-
warn(`Invalid command line argument '${providedKey}'.`);
338+
warn(`Warning: Invalid command line argument '${providedKey}'.`);
339339
}
340340
}
341341
}
342342

343343
if (usedInvalidArgument || usedDeprecatedArgument) {
344-
warn("Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server.");
344+
warn("- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server.");
345345
}
346346

347347
if (usedInvalidArgument) {
@@ -372,6 +372,24 @@ export function registerKnownSecretsInRootKeychain(userConfig: Partial<UserConfi
372372
maybeRegister(userConfig.username, "user");
373373
}
374374

375+
export function warnIfVectorSearchNotEnabledCorrectly(config: UserConfig, warn: (message: string) => void): void {
376+
const vectorSearchEnabled = config.previewFeatures.includes("vectorSearch");
377+
const embeddingsProviderConfigured = !!config.voyageApiKey;
378+
if (vectorSearchEnabled && !embeddingsProviderConfigured) {
379+
warn(`\
380+
Warning: Vector search is enabled but no embeddings provider is configured.
381+
- Set an embeddings provider configuration option to enable auto-embeddings during document insertion and text-based queries with $vectorSearch.\
382+
`);
383+
}
384+
385+
if (!vectorSearchEnabled && embeddingsProviderConfigured) {
386+
warn(`\
387+
Warning: An embeddings provider is configured but the 'vectorSearch' preview feature is not enabled.
388+
- Enable vector search by adding 'vectorSearch' to the 'previewFeatures' configuration option, or remove the embeddings provider configuration if not needed.\
389+
`);
390+
}
391+
}
392+
375393
export function setupUserConfig({ cli, env }: { cli: string[]; env: Record<string, unknown> }): UserConfig {
376394
const rawConfig = {
377395
...parseEnvConfig(env),
@@ -392,6 +410,7 @@ export function setupUserConfig({ cli, env }: { cli: string[]; env: Record<strin
392410
// We don't have as schema defined for all args-parser arguments so we need to merge the raw config with the parsed config.
393411
const userConfig = { ...rawConfig, ...parseResult.data } as UserConfig;
394412

413+
warnIfVectorSearchNotEnabledCorrectly(userConfig, (message) => console.warn(message));
395414
registerKnownSecretsInRootKeychain(userConfig);
396415
return userConfig;
397416
}

src/common/search/vectorSearchEmbeddingsManager.ts

Lines changed: 45 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@ export type VectorFieldIndexDefinition = {
2424
export type VectorFieldValidationError = {
2525
path: string;
2626
expectedNumDimensions: number;
27-
expectedQuantization: Quantization;
2827
actualNumDimensions: number | "unknown";
29-
actualQuantization: Quantization | "unknown";
30-
error: "dimension-mismatch" | "quantization-mismatch" | "not-a-vector" | "not-numeric";
28+
error: "dimension-mismatch" | "not-a-vector" | "not-numeric";
3129
};
3230

3331
export type EmbeddingNamespace = `${string}.${string}`;
@@ -116,9 +114,9 @@ export class VectorSearchEmbeddingsManager {
116114
if (embeddingValidationResults.length > 0) {
117115
const embeddingValidationMessages = embeddingValidationResults.map(
118116
(validation) =>
119-
`- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions and ${validation.expectedQuantization}` +
120-
` quantization, and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions}, ` +
121-
`actual quantization: ${validation.actualQuantization}. Error: ${validation.error}`
117+
`- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions,` +
118+
` and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions},` +
119+
` Error: ${validation.error}`
122120
);
123121

124122
throw new MongoDBError(
@@ -179,16 +177,36 @@ export class VectorSearchEmbeddingsManager {
179177
let fieldRef: unknown = document;
180178

181179
const constructError = (
182-
details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions" | "actualQuantization">>
180+
details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions">>
183181
): VectorFieldValidationError => ({
184182
path: definition.path,
185183
expectedNumDimensions: definition.numDimensions,
186-
expectedQuantization: definition.quantization,
187184
actualNumDimensions: details.actualNumDimensions ?? "unknown",
188-
actualQuantization: details.actualQuantization ?? "unknown",
189185
error: details.error ?? "not-a-vector",
190186
});
191187

188+
const extractUnderlyingVector = (fieldRef: unknown): ArrayLike<unknown> | undefined => {
189+
if (fieldRef instanceof BSON.Binary) {
190+
try {
191+
return fieldRef.toFloat32Array();
192+
} catch {
193+
// nothing to do here
194+
}
195+
196+
try {
197+
return fieldRef.toBits();
198+
} catch {
199+
// nothing to do here
200+
}
201+
}
202+
203+
if (Array.isArray(fieldRef)) {
204+
return fieldRef as Array<unknown>;
205+
}
206+
207+
return undefined;
208+
};
209+
192210
for (const field of fieldPath) {
193211
if (fieldRef && typeof fieldRef === "object" && field in fieldRef) {
194212
fieldRef = (fieldRef as Record<string, unknown>)[field];
@@ -197,70 +215,25 @@ export class VectorSearchEmbeddingsManager {
197215
}
198216
}
199217

200-
switch (definition.quantization) {
201-
// Because quantization is not defined by the user
202-
// we have to trust them in the format they use.
203-
case "none":
204-
return undefined;
205-
case "scalar":
206-
case "binary":
207-
if (fieldRef instanceof BSON.Binary) {
208-
try {
209-
const elements = fieldRef.toFloat32Array();
210-
if (elements.length !== definition.numDimensions) {
211-
return constructError({
212-
actualNumDimensions: elements.length,
213-
actualQuantization: "binary",
214-
error: "dimension-mismatch",
215-
});
216-
}
217-
218-
return undefined;
219-
} catch {
220-
// bits are also supported
221-
try {
222-
const bits = fieldRef.toBits();
223-
if (bits.length !== definition.numDimensions) {
224-
return constructError({
225-
actualNumDimensions: bits.length,
226-
actualQuantization: "binary",
227-
error: "dimension-mismatch",
228-
});
229-
}
230-
231-
return undefined;
232-
} catch {
233-
return constructError({
234-
actualQuantization: "binary",
235-
error: "not-a-vector",
236-
});
237-
}
238-
}
239-
} else {
240-
if (!Array.isArray(fieldRef)) {
241-
return constructError({
242-
error: "not-a-vector",
243-
});
244-
}
245-
246-
if (fieldRef.length !== definition.numDimensions) {
247-
return constructError({
248-
actualNumDimensions: fieldRef.length,
249-
actualQuantization: "scalar",
250-
error: "dimension-mismatch",
251-
});
252-
}
253-
254-
if (!fieldRef.every((e) => this.isANumber(e))) {
255-
return constructError({
256-
actualNumDimensions: fieldRef.length,
257-
actualQuantization: "scalar",
258-
error: "not-numeric",
259-
});
260-
}
261-
}
218+
const maybeVector = extractUnderlyingVector(fieldRef);
219+
if (!maybeVector) {
220+
return constructError({
221+
error: "not-a-vector",
222+
});
223+
}
262224

263-
break;
225+
if (maybeVector.length !== definition.numDimensions) {
226+
return constructError({
227+
actualNumDimensions: maybeVector.length,
228+
error: "dimension-mismatch",
229+
});
230+
}
231+
232+
if (Array.isArray(maybeVector) && maybeVector.some((e) => !this.isANumber(e))) {
233+
return constructError({
234+
actualNumDimensions: maybeVector.length,
235+
error: "not-numeric",
236+
});
264237
}
265238

266239
return undefined;

src/tools/mongodb/create/createIndex.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ export class CreateIndexTool extends MongoDBToolBase {
8080
])
8181
)
8282
.describe(
83-
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes"
83+
`The index definition. Use 'classic' for standard indexes${this.isFeatureEnabled("vectorSearch") ? " and 'vectorSearch' for vector search indexes" : ""}.`
8484
),
8585
};
8686

src/tools/mongodb/metadata/explain.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import type { ToolArgs, OperationType } from "../../tool.js";
44
import { formatUntrustedData } from "../../tool.js";
55
import { z } from "zod";
66
import type { Document } from "mongodb";
7-
import { AggregateArgs } from "../read/aggregate.js";
7+
import { getAggregateArgs } from "../read/aggregate.js";
88
import { FindArgs } from "../read/find.js";
99
import { CountArgs } from "../read/count.js";
1010

@@ -20,7 +20,7 @@ export class ExplainTool extends MongoDBToolBase {
2020
z.discriminatedUnion("name", [
2121
z.object({
2222
name: z.literal("aggregate"),
23-
arguments: z.object(AggregateArgs),
23+
arguments: z.object(getAggregateArgs(this.isFeatureEnabled("vectorSearch"))),
2424
}),
2525
z.object({
2626
name: z.literal("find"),

src/tools/mongodb/mongodbSchemas.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ export type EmbeddingParameters = {
4242
export const zSupportedEmbeddingParameters = zVoyageEmbeddingParameters.extend({ model: zVoyageModels });
4343
export type SupportedEmbeddingParameters = z.infer<typeof zSupportedEmbeddingParameters>;
4444

45-
export const AnyVectorSearchStage = zEJSON();
45+
export const AnyAggregateStage = zEJSON();
4646
export const VectorSearchStage = z.object({
4747
$vectorSearch: z
4848
.object({

src/tools/mongodb/read/aggregate.ts

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,17 @@ import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorU
1212
import { operationWithFallback } from "../../../helpers/operationWithFallback.js";
1313
import { AGG_COUNT_MAX_TIME_MS_CAP, ONE_MB, CURSOR_LIMITS_TO_LLM_TEXT } from "../../../helpers/constants.js";
1414
import { LogId } from "../../../common/logger.js";
15-
import { AnyVectorSearchStage, VectorSearchStage } from "../mongodbSchemas.js";
15+
import { AnyAggregateStage, VectorSearchStage } from "../mongodbSchemas.js";
1616
import {
1717
assertVectorSearchFilterFieldsAreIndexed,
1818
type VectorSearchIndex,
1919
} from "../../../helpers/assertVectorSearchFilterFieldsAreIndexed.js";
2020

21-
export const AggregateArgs = {
22-
pipeline: z.array(z.union([AnyVectorSearchStage, VectorSearchStage])).describe(
23-
`An array of aggregation stages to execute.
21+
const pipelineDescriptionWithVectorSearch = `\
22+
An array of aggregation stages to execute.
2423
\`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline.
2524
### Usage Rules for \`$vectorSearch\`
26-
- **Unset embeddings:**
25+
- **Unset embeddings:**
2726
Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**.
2827
- **Pre-filtering:**
2928
If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index.
@@ -32,20 +31,28 @@ If the user requests additional filtering, include filters in \`$vectorSearch.fi
3231
For all remaining filters, add a $match stage after $vectorSearch.
3332
### Note to LLM
3433
- If unsure which fields are filterable, use the collection-indexes tool to determine valid prefilter fields.
35-
- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.`
36-
),
37-
responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
34+
- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.\
35+
`;
36+
37+
const genericPipelineDescription = "An array of aggregation stages to execute.";
38+
39+
export const getAggregateArgs = (vectorSearchEnabled: boolean) =>
40+
({
41+
pipeline: z
42+
.array(vectorSearchEnabled ? z.union([AnyAggregateStage, VectorSearchStage]) : AnyAggregateStage)
43+
.describe(vectorSearchEnabled ? pipelineDescriptionWithVectorSearch : genericPipelineDescription),
44+
responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
3845
The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \
3946
Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\
4047
`),
41-
};
48+
}) as const;
4249

4350
export class AggregateTool extends MongoDBToolBase {
4451
public name = "aggregate";
4552
protected description = "Run an aggregation against a MongoDB collection";
4653
protected argsShape = {
4754
...DbOperationArgs,
48-
...AggregateArgs,
55+
...getAggregateArgs(this.isFeatureEnabled("vectorSearch")),
4956
};
5057
public operationType: OperationType = "read";
5158

src/tools/mongodb/read/export.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import type { OperationType, ToolArgs } from "../../tool.js";
66
import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js";
77
import { FindArgs } from "./find.js";
88
import { jsonExportFormat } from "../../../common/exportsManager.js";
9-
import { AggregateArgs } from "./aggregate.js";
9+
import { getAggregateArgs } from "./aggregate.js";
1010

1111
export class ExportTool extends MongoDBToolBase {
1212
public name = "export";
@@ -32,7 +32,9 @@ export class ExportTool extends MongoDBToolBase {
3232
name: z
3333
.literal("aggregate")
3434
.describe("The literal name 'aggregate' to represent an aggregation cursor as target."),
35-
arguments: z.object(AggregateArgs).describe("The arguments for 'aggregate' operation."),
35+
arguments: z
36+
.object(getAggregateArgs(this.isFeatureEnabled("vectorSearch")))
37+
.describe("The arguments for 'aggregate' operation."),
3638
}),
3739
])
3840
)

tests/integration/tools/mongodb/create/createIndex.test.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,22 @@ import { ObjectId, type Collection, type Document, type IndexDirection } from "m
1313
import { afterEach, beforeEach, describe, expect, it } from "vitest";
1414

1515
describeWithMongoDB("createIndex tool when search is not enabled", (integration) => {
16+
validateToolMetadata(integration, "create-index", "Create an index for a collection", [
17+
...databaseCollectionParameters,
18+
{
19+
name: "definition",
20+
type: "array",
21+
description: "The index definition. Use 'classic' for standard indexes.",
22+
required: true,
23+
},
24+
{
25+
name: "name",
26+
type: "string",
27+
description: "The name of the index",
28+
required: false,
29+
},
30+
]);
31+
1632
it("doesn't allow creating vector search indexes", async () => {
1733
expect(integration.mcpServer().userConfig.previewFeatures).to.not.include("vectorSearch");
1834

@@ -99,7 +115,7 @@ describeWithMongoDB(
99115
name: "definition",
100116
type: "array",
101117
description:
102-
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes",
118+
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes.",
103119
required: true,
104120
},
105121
{

0 commit comments

Comments
 (0)