From 198b36e7b26fd73dd87c935e78630001ba55582b Mon Sep 17 00:00:00 2001 From: Himanshu Singh Date: Fri, 7 Nov 2025 13:11:16 +0100 Subject: [PATCH 1/5] chore: add additional tests for metadata validation --- src/tools/mongodb/create/createIndex.ts | 2 +- .../tools/mongodb/create/createIndex.test.ts | 18 +++++++++++++++++- .../tools/mongodb/create/insertMany.test.ts | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/tools/mongodb/create/createIndex.ts b/src/tools/mongodb/create/createIndex.ts index 68ad4d91..fcbc12ee 100644 --- a/src/tools/mongodb/create/createIndex.ts +++ b/src/tools/mongodb/create/createIndex.ts @@ -80,7 +80,7 @@ export class CreateIndexTool extends MongoDBToolBase { ]) ) .describe( - "The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes" + `The index definition. Use 'classic' for standard indexes${this.isFeatureEnabled("vectorSearch") ? " and 'vectorSearch' for vector search indexes" : ""}.` ), }; diff --git a/tests/integration/tools/mongodb/create/createIndex.test.ts b/tests/integration/tools/mongodb/create/createIndex.test.ts index f76bb5ba..a4c4a7be 100644 --- a/tests/integration/tools/mongodb/create/createIndex.test.ts +++ b/tests/integration/tools/mongodb/create/createIndex.test.ts @@ -13,6 +13,22 @@ import { ObjectId, type Collection, type Document, type IndexDirection } from "m import { afterEach, beforeEach, describe, expect, it } from "vitest"; describeWithMongoDB("createIndex tool when search is not enabled", (integration) => { + validateToolMetadata(integration, "create-index", "Create an index for a collection", [ + ...databaseCollectionParameters, + { + name: "definition", + type: "array", + description: "The index definition. Use 'classic' for standard indexes.", + required: true, + }, + { + name: "name", + type: "string", + description: "The name of the index", + required: false, + }, + ]); + it("doesn't allow creating vector search indexes", async () => { expect(integration.mcpServer().userConfig.previewFeatures).to.not.include("vectorSearch"); @@ -99,7 +115,7 @@ describeWithMongoDB( name: "definition", type: "array", description: - "The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes", + "The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes.", required: true, }, { diff --git a/tests/integration/tools/mongodb/create/insertMany.test.ts b/tests/integration/tools/mongodb/create/insertMany.test.ts index e9964e26..ac72a131 100644 --- a/tests/integration/tools/mongodb/create/insertMany.test.ts +++ b/tests/integration/tools/mongodb/create/insertMany.test.ts @@ -124,6 +124,24 @@ describeWithMongoDB( await collection.drop(); }); + validateToolMetadata(integration, "insert-many", "Insert an array of documents into a MongoDB collection", [ + ...databaseCollectionParameters, + { + name: "documents", + type: "array", + description: + "The array of documents to insert, matching the syntax of the document argument of db.collection.insertMany().", + required: true, + }, + { + name: "embeddingParameters", + type: "object", + description: + "The embedding model and its parameters to use to generate embeddings for fields with vector search indexes. Note to LLM: If unsure which embedding model to use, ask the user before providing one.", + required: false, + }, + ]); + it("inserts a document when the embedding is correct", async () => { await createVectorSearchIndexAndWait(integration.mongoClient(), database, "test", [ { From 5cd2e51f0e78ec272b4d2524a2a1e5c8dd5001f3 Mon Sep 17 00:00:00 2001 From: Himanshu Singh Date: Fri, 7 Nov 2025 14:26:57 +0100 Subject: [PATCH 2/5] chore: validate when quantization is 'none' --- .../search/vectorSearchEmbeddingsManager.ts | 28 ++++- .../vectorSearchEmbeddingsManager.test.ts | 114 +++++++++++------- 2 files changed, 98 insertions(+), 44 deletions(-) diff --git a/src/common/search/vectorSearchEmbeddingsManager.ts b/src/common/search/vectorSearchEmbeddingsManager.ts index dfcd4e28..b961b71a 100644 --- a/src/common/search/vectorSearchEmbeddingsManager.ts +++ b/src/common/search/vectorSearchEmbeddingsManager.ts @@ -198,9 +198,33 @@ export class VectorSearchEmbeddingsManager { } switch (definition.quantization) { - // Because quantization is not defined by the user - // we have to trust them in the format they use. + // Quantization "none" means no quantization is performed, so + // full-fidelity vectors are stored therefore the underlying vector + // must be stored as an array of numbers having the same dimension + // as that of the index. case "none": + if (!Array.isArray(fieldRef)) { + return constructError({ + error: "not-a-vector", + }); + } + + if (fieldRef.length !== definition.numDimensions) { + return constructError({ + actualNumDimensions: fieldRef.length, + actualQuantization: "none", + error: "dimension-mismatch", + }); + } + + if (!fieldRef.every((e) => this.isANumber(e))) { + return constructError({ + actualNumDimensions: fieldRef.length, + actualQuantization: "none", + error: "not-numeric", + }); + } + return undefined; case "scalar": case "binary": diff --git a/tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts b/tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts index 2bf05146..e1283eed 100644 --- a/tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts +++ b/tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts @@ -42,6 +42,13 @@ const embeddingConfig: Map = n [ mapKey, [ + { + type: "vector", + path: "embedding_field_wo_quantization", + numDimensions: 8, + quantization: "none", + similarity: "euclidean", + }, { type: "vector", path: "embedding_field", @@ -278,51 +285,74 @@ describe("VectorSearchEmbeddingsManager", () => { expect(result).toHaveLength(0); }); - it("documents inserting the field with wrong type are invalid", async () => { - const result = await embeddings.findFieldsWithWrongEmbeddings( - { database, collection }, - { embedding_field: "some text" } - ); - - expect(result).toHaveLength(1); - }); + it.each(["embedding_field", "embedding_field_wo_quantization"] as const)( + "documents inserting the field with wrong type are invalid - $0", + async (field) => { + const result = await embeddings.findFieldsWithWrongEmbeddings( + { database, collection }, + { [field]: "some text" } + ); - it("documents inserting the field with wrong dimensions are invalid", async () => { - const result = await embeddings.findFieldsWithWrongEmbeddings( - { database, collection }, - { embedding_field: [1, 2, 3] } - ); - - expect(result).toHaveLength(1); - const expectedError: VectorFieldValidationError = { - actualNumDimensions: 3, - actualQuantization: "scalar", - error: "dimension-mismatch", - expectedNumDimensions: 8, - expectedQuantization: "scalar", - path: "embedding_field", - }; - expect(result[0]).toEqual(expectedError); - }); + expect(result).toHaveLength(1); + } + ); - it("documents inserting the field with correct dimensions, but wrong type are invalid", async () => { - const result = await embeddings.findFieldsWithWrongEmbeddings( - { database, collection }, - { embedding_field: ["1", "2", "3", "4", "5", "6", "7", "8"] } - ); + it.each([ + { path: "embedding_field", expectedQuantization: "scalar", actualQuantization: "scalar" }, + { + path: "embedding_field_wo_quantization", + expectedQuantization: "none", + actualQuantization: "none", + }, + ] as const)( + "documents inserting the field with wrong dimensions are invalid - path = $path", + async ({ path, expectedQuantization, actualQuantization }) => { + const result = await embeddings.findFieldsWithWrongEmbeddings( + { database, collection }, + { [path]: [1, 2, 3] } + ); + + expect(result).toHaveLength(1); + const expectedError: VectorFieldValidationError = { + actualNumDimensions: 3, + actualQuantization, + error: "dimension-mismatch", + expectedNumDimensions: 8, + expectedQuantization, + path, + }; + expect(result[0]).toEqual(expectedError); + } + ); - expect(result).toHaveLength(1); - const expectedError: VectorFieldValidationError = { - actualNumDimensions: 8, - actualQuantization: "scalar", - error: "not-numeric", - expectedNumDimensions: 8, - expectedQuantization: "scalar", - path: "embedding_field", - }; - - expect(result[0]).toEqual(expectedError); - }); + it.each([ + { path: "embedding_field", expectedQuantization: "scalar", actualQuantization: "scalar" }, + { + path: "embedding_field_wo_quantization", + expectedQuantization: "none", + actualQuantization: "none", + }, + ] as const)( + "documents inserting the field with correct dimensions, but wrong type are invalid - $path", + async ({ path, expectedQuantization, actualQuantization }) => { + const result = await embeddings.findFieldsWithWrongEmbeddings( + { database, collection }, + { [path]: ["1", "2", "3", "4", "5", "6", "7", "8"] } + ); + + expect(result).toHaveLength(1); + const expectedError: VectorFieldValidationError = { + actualNumDimensions: 8, + actualQuantization, + error: "not-numeric", + expectedNumDimensions: 8, + expectedQuantization, + path, + }; + + expect(result[0]).toEqual(expectedError); + } + ); it("documents inserting the field with correct dimensions and quantization in binary are valid", async () => { const result = await embeddings.findFieldsWithWrongEmbeddings( From fbb3d0f35f4b7ca8e04210b84103acef37256850 Mon Sep 17 00:00:00 2001 From: Himanshu Singh Date: Fri, 7 Nov 2025 15:23:42 +0100 Subject: [PATCH 3/5] chore: add warning when vector search is not correctly configured --- src/common/config.ts | 27 +++++++++++++++++++++++---- tests/unit/common/config.test.ts | 20 ++++++++++++-------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/common/config.ts b/src/common/config.ts index e7ece022..eb9adc11 100644 --- a/src/common/config.ts +++ b/src/common/config.ts @@ -319,7 +319,7 @@ export function warnAboutDeprecatedOrUnknownCliArgs( if (knownArgs.connectionString) { usedDeprecatedArgument = true; warn( - "The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string." + "Warning: The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string." ); } @@ -333,15 +333,15 @@ export function warnAboutDeprecatedOrUnknownCliArgs( if (!valid) { usedInvalidArgument = true; if (suggestion) { - warn(`Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`); + warn(`Warning: Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`); } else { - warn(`Invalid command line argument '${providedKey}'.`); + warn(`Warning: Invalid command line argument '${providedKey}'.`); } } } if (usedInvalidArgument || usedDeprecatedArgument) { - warn("Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."); + warn("- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."); } if (usedInvalidArgument) { @@ -372,6 +372,24 @@ export function registerKnownSecretsInRootKeychain(userConfig: Partial }): UserConfig { const rawConfig = { ...parseEnvConfig(env), @@ -392,6 +410,7 @@ export function setupUserConfig({ cli, env }: { cli: string[]; env: Record { describe("CLI arguments", () => { const referDocMessage = - "Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."; + "- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server."; type TestCase = { readonly cliArg: keyof (CliOptions & UserConfig); readonly warning: string }; const testCases = [ { cliArg: "connectionString", warning: - "The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string.", + "Warning: The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string.", }, ] as TestCase[]; @@ -742,9 +742,9 @@ describe("CLI arguments", () => { { warn, exit } ); - expect(warn).toHaveBeenCalledWith("Invalid command line argument 'wakanda'."); + expect(warn).toHaveBeenCalledWith("Warning: Invalid command line argument 'wakanda'."); expect(warn).toHaveBeenCalledWith( - "Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." + "- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." ); }); @@ -767,9 +767,11 @@ describe("CLI arguments", () => { { warn, exit } ); - expect(warn).toHaveBeenCalledWith("Invalid command line argument 'readonli'. Did you mean 'readOnly'?"); expect(warn).toHaveBeenCalledWith( - "Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." + "Warning: Invalid command line argument 'readonli'. Did you mean 'readOnly'?" + ); + expect(warn).toHaveBeenCalledWith( + "- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." ); }); @@ -781,9 +783,11 @@ describe("CLI arguments", () => { { warn, exit } ); - expect(warn).toHaveBeenCalledWith("Invalid command line argument 'readonly'. Did you mean 'readOnly'?"); expect(warn).toHaveBeenCalledWith( - "Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." + "Warning: Invalid command line argument 'readonly'. Did you mean 'readOnly'?" + ); + expect(warn).toHaveBeenCalledWith( + "- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server." ); }); }); From 6c3142b5fed0585b88a2d714eed1fa5be0c903fd Mon Sep 17 00:00:00 2001 From: Himanshu Singh Date: Fri, 7 Nov 2025 15:33:09 +0100 Subject: [PATCH 4/5] chore: address copilot warnings --- src/common/config.ts | 2 +- src/common/search/vectorSearchEmbeddingsManager.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/config.ts b/src/common/config.ts index eb9adc11..85c52085 100644 --- a/src/common/config.ts +++ b/src/common/config.ts @@ -378,7 +378,7 @@ function warnIfVectorSearchNotEnabledCorrectly(config: UserConfig): void { if (vectorSearchEnabled && !embeddingsProviderConfigured) { console.warn(`\ Warning: Vector search is enabled but no embeddings provider is configured. -- Set the 'voyageApiKey' configuration option to enable auto-embeddings during document insertion and text-based queries with $vectorSearch.\ +- Set an embeddings provider configuration option to enable auto-embeddings during document insertion and text-based queries with $vectorSearch.\ `); } diff --git a/src/common/search/vectorSearchEmbeddingsManager.ts b/src/common/search/vectorSearchEmbeddingsManager.ts index b961b71a..a92ca932 100644 --- a/src/common/search/vectorSearchEmbeddingsManager.ts +++ b/src/common/search/vectorSearchEmbeddingsManager.ts @@ -217,7 +217,7 @@ export class VectorSearchEmbeddingsManager { }); } - if (!fieldRef.every((e) => this.isANumber(e))) { + if (fieldRef.some((e) => !this.isANumber(e))) { return constructError({ actualNumDimensions: fieldRef.length, actualQuantization: "none", @@ -275,7 +275,7 @@ export class VectorSearchEmbeddingsManager { }); } - if (!fieldRef.every((e) => this.isANumber(e))) { + if (fieldRef.some((e) => !this.isANumber(e))) { return constructError({ actualNumDimensions: fieldRef.length, actualQuantization: "scalar", From d9bd87037eb2683660cf5f72a9bf55f722ee9837 Mon Sep 17 00:00:00 2001 From: Himanshu Singh Date: Mon, 10 Nov 2025 10:27:27 +0100 Subject: [PATCH 5/5] chore: feature flag the aggregate schema --- src/tools/mongodb/metadata/explain.ts | 4 +-- src/tools/mongodb/mongodbSchemas.ts | 2 +- src/tools/mongodb/read/aggregate.ts | 25 +++++++++++-------- src/tools/mongodb/read/export.ts | 6 +++-- .../tools/mongodb/read/aggregate.test.ts | 4 +-- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/tools/mongodb/metadata/explain.ts b/src/tools/mongodb/metadata/explain.ts index d1f7c686..a98d4f6e 100644 --- a/src/tools/mongodb/metadata/explain.ts +++ b/src/tools/mongodb/metadata/explain.ts @@ -4,7 +4,7 @@ import type { ToolArgs, OperationType } from "../../tool.js"; import { formatUntrustedData } from "../../tool.js"; import { z } from "zod"; import type { Document } from "mongodb"; -import { AggregateArgs } from "../read/aggregate.js"; +import { getAggregateArgs } from "../read/aggregate.js"; import { FindArgs } from "../read/find.js"; import { CountArgs } from "../read/count.js"; @@ -20,7 +20,7 @@ export class ExplainTool extends MongoDBToolBase { z.discriminatedUnion("name", [ z.object({ name: z.literal("aggregate"), - arguments: z.object(AggregateArgs), + arguments: z.object(getAggregateArgs(this.isFeatureEnabled("vectorSearch"))), }), z.object({ name: z.literal("find"), diff --git a/src/tools/mongodb/mongodbSchemas.ts b/src/tools/mongodb/mongodbSchemas.ts index cfae16b2..392273ba 100644 --- a/src/tools/mongodb/mongodbSchemas.ts +++ b/src/tools/mongodb/mongodbSchemas.ts @@ -42,7 +42,7 @@ export type EmbeddingParameters = { export const zSupportedEmbeddingParameters = zVoyageEmbeddingParameters.extend({ model: zVoyageModels }); export type SupportedEmbeddingParameters = z.infer; -export const AnyVectorSearchStage = zEJSON(); +export const AnyAggregateStage = zEJSON(); export const VectorSearchStage = z.object({ $vectorSearch: z .object({ diff --git a/src/tools/mongodb/read/aggregate.ts b/src/tools/mongodb/read/aggregate.ts index d6a624cf..ae28dba1 100644 --- a/src/tools/mongodb/read/aggregate.ts +++ b/src/tools/mongodb/read/aggregate.ts @@ -12,18 +12,17 @@ import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorU import { operationWithFallback } from "../../../helpers/operationWithFallback.js"; import { AGG_COUNT_MAX_TIME_MS_CAP, ONE_MB, CURSOR_LIMITS_TO_LLM_TEXT } from "../../../helpers/constants.js"; import { LogId } from "../../../common/logger.js"; -import { AnyVectorSearchStage, VectorSearchStage } from "../mongodbSchemas.js"; +import { AnyAggregateStage, VectorSearchStage } from "../mongodbSchemas.js"; import { assertVectorSearchFilterFieldsAreIndexed, type VectorSearchIndex, } from "../../../helpers/assertVectorSearchFilterFieldsAreIndexed.js"; -export const AggregateArgs = { - pipeline: z.array(z.union([AnyVectorSearchStage, VectorSearchStage])).describe( - `An array of aggregation stages to execute. +const pipelineDescription = `\ +An array of aggregation stages to execute. \`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline. ### Usage Rules for \`$vectorSearch\` -- **Unset embeddings:** +- **Unset embeddings:** Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**. - **Pre-filtering:** If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index. @@ -32,20 +31,26 @@ If the user requests additional filtering, include filters in \`$vectorSearch.fi For all remaining filters, add a $match stage after $vectorSearch. ### Note to LLM - If unsure which fields are filterable, use the collection-indexes tool to determine valid prefilter fields. -- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.` - ), - responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\ +- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.\ +`; + +export const getAggregateArgs = (vectorSearchEnabled: boolean) => + ({ + pipeline: z + .array(vectorSearchEnabled ? z.union([AnyAggregateStage, VectorSearchStage]) : AnyAggregateStage) + .describe(pipelineDescription), + responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\ The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \ Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\ `), -}; + }) as const; export class AggregateTool extends MongoDBToolBase { public name = "aggregate"; protected description = "Run an aggregation against a MongoDB collection"; protected argsShape = { ...DbOperationArgs, - ...AggregateArgs, + ...getAggregateArgs(this.isFeatureEnabled("vectorSearch")), }; public operationType: OperationType = "read"; diff --git a/src/tools/mongodb/read/export.ts b/src/tools/mongodb/read/export.ts index e2ac194b..a12ed9fb 100644 --- a/src/tools/mongodb/read/export.ts +++ b/src/tools/mongodb/read/export.ts @@ -6,7 +6,7 @@ import type { OperationType, ToolArgs } from "../../tool.js"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; import { FindArgs } from "./find.js"; import { jsonExportFormat } from "../../../common/exportsManager.js"; -import { AggregateArgs } from "./aggregate.js"; +import { getAggregateArgs } from "./aggregate.js"; export class ExportTool extends MongoDBToolBase { public name = "export"; @@ -32,7 +32,9 @@ export class ExportTool extends MongoDBToolBase { name: z .literal("aggregate") .describe("The literal name 'aggregate' to represent an aggregation cursor as target."), - arguments: z.object(AggregateArgs).describe("The arguments for 'aggregate' operation."), + arguments: z + .object(getAggregateArgs(this.isFeatureEnabled("vectorSearch"))) + .describe("The arguments for 'aggregate' operation."), }), ]) ) diff --git a/tests/integration/tools/mongodb/read/aggregate.test.ts b/tests/integration/tools/mongodb/read/aggregate.test.ts index 11b29e4e..1616758e 100644 --- a/tests/integration/tools/mongodb/read/aggregate.test.ts +++ b/tests/integration/tools/mongodb/read/aggregate.test.ts @@ -27,10 +27,10 @@ describeWithMongoDB("aggregate tool", (integration) => { ...databaseCollectionParameters, { name: "pipeline", - description: `An array of aggregation stages to execute. + description: `An array of aggregation stages to execute. \`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline. ### Usage Rules for \`$vectorSearch\` -- **Unset embeddings:** +- **Unset embeddings:** Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**. - **Pre-filtering:** If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index.