Skip to content

Commit 33f8a05

Browse files
authored
feat: add ability to create atlas search indexes MCP-275 (#692)
1 parent 2415734 commit 33f8a05

File tree

11 files changed

+691
-108
lines changed

11 files changed

+691
-108
lines changed

.vscode/launch.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
55
"version": "0.2.0",
66
"configurations": [
7+
{
8+
"name": "Attach by Process ID",
9+
"processId": "${command:PickProcess}",
10+
"request": "attach",
11+
"skipFiles": ["<node_internals>/**"],
12+
"type": "node"
13+
},
714
{
815
"type": "node",
916
"request": "launch",

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,6 @@ npx -y mongodb-mcp-server@latest --logPath=/path/to/logs --readOnly --indexCheck
669669
"args": [
670670
"-y",
671671
"mongodb-mcp-server",
672-
"--connectionString",
673672
"mongodb+srv://username:password@cluster.mongodb.net/myDatabase",
674673
"--readOnly"
675674
]

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"dist"
4545
],
4646
"scripts": {
47-
"start": "node dist/index.js --transport http --loggers stderr mcp",
47+
"start": "node dist/index.js --transport http --loggers stderr mcp --previewFeatures vectorSearch",
4848
"start:stdio": "node dist/index.js --transport stdio --loggers stderr mcp",
4949
"prepare": "husky && pnpm run build",
5050
"build:clean": "rm -rf dist",

src/tools/args.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export const ALLOWED_CLUSTER_NAME_CHARACTERS_ERROR =
1818
const ALLOWED_PROJECT_NAME_CHARACTERS_REGEX = /^[a-zA-Z0-9\s()@&+:._',-]+$/;
1919
export const ALLOWED_PROJECT_NAME_CHARACTERS_ERROR =
2020
"Project names can't be longer than 64 characters and can only contain letters, numbers, spaces, and the following symbols: ( ) @ & + : . _ - ' ,";
21+
2122
export const CommonArgs = {
2223
string: (): ZodString => z.string().regex(NO_UNICODE_REGEX, NO_UNICODE_ERROR),
2324

src/tools/mongodb/create/createIndex.ts

Lines changed: 152 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -7,62 +7,129 @@ import { quantizationEnum } from "../../../common/search/vectorSearchEmbeddingsM
77
import { similarityValues } from "../../../common/schemas.js";
88

99
export class CreateIndexTool extends MongoDBToolBase {
10-
private vectorSearchIndexDefinition = z.object({
11-
type: z.literal("vectorSearch"),
12-
fields: z
13-
.array(
14-
z.discriminatedUnion("type", [
15-
z
16-
.object({
17-
type: z.literal("filter"),
18-
path: z
19-
.string()
20-
.describe(
21-
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
22-
),
23-
})
24-
.strict()
25-
.describe("Definition for a field that will be used for pre-filtering results."),
26-
z
27-
.object({
28-
type: z.literal("vector"),
29-
path: z
30-
.string()
31-
.describe(
32-
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
33-
),
34-
numDimensions: z
35-
.number()
36-
.min(1)
37-
.max(8192)
38-
.default(this.config.vectorSearchDimensions)
39-
.describe(
40-
"Number of vector dimensions that MongoDB Vector Search enforces at index-time and query-time"
41-
),
42-
similarity: z
43-
.enum(similarityValues)
44-
.default(this.config.vectorSearchSimilarityFunction)
45-
.describe(
46-
"Vector similarity function to use to search for top K-nearest neighbors. You can set this field only for vector-type fields."
47-
),
48-
quantization: quantizationEnum
49-
.default("none")
10+
private vectorSearchIndexDefinition = z
11+
.object({
12+
type: z.literal("vectorSearch"),
13+
fields: z
14+
.array(
15+
z.discriminatedUnion("type", [
16+
z
17+
.object({
18+
type: z.literal("filter"),
19+
path: z
20+
.string()
21+
.describe(
22+
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
23+
),
24+
})
25+
.strict()
26+
.describe("Definition for a field that will be used for pre-filtering results."),
27+
z
28+
.object({
29+
type: z.literal("vector"),
30+
path: z
31+
.string()
32+
.describe(
33+
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
34+
),
35+
numDimensions: z
36+
.number()
37+
.min(1)
38+
.max(8192)
39+
.default(this.config.vectorSearchDimensions)
40+
.describe(
41+
"Number of vector dimensions that MongoDB Vector Search enforces at index-time and query-time"
42+
),
43+
similarity: z
44+
.enum(similarityValues)
45+
.default(this.config.vectorSearchSimilarityFunction)
46+
.describe(
47+
"Vector similarity function to use to search for top K-nearest neighbors. You can set this field only for vector-type fields."
48+
),
49+
quantization: quantizationEnum
50+
.default("none")
51+
.describe(
52+
"Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float or double vectors."
53+
),
54+
})
55+
.strict()
56+
.describe("Definition for a field that contains vector embeddings."),
57+
])
58+
)
59+
.nonempty()
60+
.refine((fields) => fields.some((f) => f.type === "vector"), {
61+
message: "At least one vector field must be defined",
62+
})
63+
.describe(
64+
"Definitions for the vector and filter fields to index, one definition per document. You must specify `vector` for fields that contain vector embeddings and `filter` for additional fields to filter on. At least one vector-type field definition is required."
65+
),
66+
})
67+
.describe("Definition for a Vector Search index.");
68+
69+
private atlasSearchIndexDefinition = z
70+
.object({
71+
type: z.literal("search"),
72+
analyzer: z
73+
.string()
74+
.optional()
75+
.default("lucene.standard")
76+
.describe(
77+
"The analyzer to use for the index. Can be one of the built-in lucene analyzers (`lucene.standard`, `lucene.simple`, `lucene.whitespace`, `lucene.keyword`), a language-specific analyzer, such as `lucene.cjk` or `lucene.czech`, or a custom analyzer defined in the Atlas UI."
78+
),
79+
mappings: z
80+
.object({
81+
dynamic: z
82+
.boolean()
83+
.optional()
84+
.default(false)
85+
.describe(
86+
"Enables or disables dynamic mapping of fields for this index. If set to true, Atlas Search recursively indexes all dynamically indexable fields. If set to false, you must specify individual fields to index using mappings.fields."
87+
),
88+
fields: z
89+
.record(
90+
z.string().describe("The field name"),
91+
z
92+
.object({
93+
type: z
94+
.enum([
95+
"autocomplete",
96+
"boolean",
97+
"date",
98+
"document",
99+
"embeddedDocuments",
100+
"geo",
101+
"number",
102+
"objectId",
103+
"string",
104+
"token",
105+
"uuid",
106+
])
107+
.describe("The field type"),
108+
})
109+
.passthrough()
50110
.describe(
51-
"Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float or double vectors."
52-
),
53-
})
54-
.strict()
55-
.describe("Definition for a field that contains vector embeddings."),
56-
])
57-
)
58-
.nonempty()
59-
.refine((fields) => fields.some((f) => f.type === "vector"), {
60-
message: "At least one vector field must be defined",
61-
})
62-
.describe(
63-
"Definitions for the vector and filter fields to index, one definition per document. You must specify `vector` for fields that contain vector embeddings and `filter` for additional fields to filter on. At least one vector-type field definition is required."
64-
),
65-
});
111+
"The field index definition. It must contain the field type, as well as any additional options for that field type."
112+
)
113+
)
114+
.optional()
115+
.describe("The field mapping definitions. If `dynamic` is set to `false`, this is required."),
116+
})
117+
.refine((data) => data.dynamic !== !!(data.fields && Object.keys(data.fields).length > 0), {
118+
message:
119+
"Either `dynamic` must be `true` and `fields` empty or `dynamic` must be `false` and at least one field must be defined in `fields`",
120+
})
121+
.describe(
122+
"Document describing the index to create. Either `dynamic` must be `true` and `fields` empty or `dynamic` must be `false` and at least one field must be defined in the `fields` document."
123+
),
124+
numPartitions: z
125+
.union([z.literal("1"), z.literal("2"), z.literal("4")])
126+
.default("1")
127+
.transform((value): number => Number.parseInt(value))
128+
.describe(
129+
"Specifies the number of sub-indexes to create if the document count exceeds two billion. If omitted, defaults to 1."
130+
),
131+
})
132+
.describe("Definition for an Atlas Search (lexical) index.");
66133

67134
public name = "create-index";
68135
protected description = "Create an index for a collection";
@@ -72,15 +139,19 @@ export class CreateIndexTool extends MongoDBToolBase {
72139
definition: z
73140
.array(
74141
z.discriminatedUnion("type", [
75-
z.object({
76-
type: z.literal("classic"),
77-
keys: z.object({}).catchall(z.custom<IndexDirection>()).describe("The index definition"),
78-
}),
79-
...(this.isFeatureEnabled("search") ? [this.vectorSearchIndexDefinition] : []),
142+
z
143+
.object({
144+
type: z.literal("classic"),
145+
keys: z.object({}).catchall(z.custom<IndexDirection>()).describe("The index definition"),
146+
})
147+
.describe("Definition for a MongoDB index (e.g. ascending/descending/geospatial)."),
148+
...(this.isFeatureEnabled("search")
149+
? [this.vectorSearchIndexDefinition, this.atlasSearchIndexDefinition]
150+
: []),
80151
])
81152
)
82153
.describe(
83-
`The index definition. Use 'classic' for standard indexes${this.isFeatureEnabled("search") ? " and 'vectorSearch' for vector search indexes" : ""}.`
154+
`The index definition. Use 'classic' for standard indexes${this.isFeatureEnabled("search") ? ", 'vectorSearch' for vector search indexes, and 'search' for Atlas Search (lexical) indexes" : ""}.`
84155
),
85156
};
86157

@@ -130,6 +201,26 @@ export class CreateIndexTool extends MongoDBToolBase {
130201
this.session.vectorSearchEmbeddingsManager.cleanupEmbeddingsForNamespace({ database, collection });
131202
}
132203

204+
break;
205+
case "search":
206+
{
207+
await this.ensureSearchIsSupported();
208+
indexes = await provider.createSearchIndexes(database, collection, [
209+
{
210+
name,
211+
definition: {
212+
mappings: definition.mappings,
213+
analyzer: definition.analyzer,
214+
numPartitions: definition.numPartitions,
215+
},
216+
type: "search",
217+
},
218+
]);
219+
220+
responseClarification =
221+
" Since this is a search index, it may take a while for the index to build. Use the `list-indexes` tool to check the index status.";
222+
}
223+
133224
break;
134225
}
135226

src/tools/mongodb/read/aggregate.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ import {
2020

2121
const pipelineDescriptionWithVectorSearch = `\
2222
An array of aggregation stages to execute.
23-
\`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline.
23+
If the user has asked for a vector search, \`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline.
24+
If the user has asked for lexical/Atlas search, use \`$search\` instead of \`$text\`.
2425
### Usage Rules for \`$vectorSearch\`
2526
- **Unset embeddings:**
2627
Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**.
@@ -29,9 +30,12 @@ If the user requests additional filtering, include filters in \`$vectorSearch.fi
2930
NEVER include fields in $vectorSearch.filter that are not part of the vector index.
3031
- **Post-filtering:**
3132
For all remaining filters, add a $match stage after $vectorSearch.
32-
### Note to LLM
3333
- If unsure which fields are filterable, use the collection-indexes tool to determine valid prefilter fields.
34-
- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.\
34+
- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.
35+
36+
### Usage Rules for \`$search\`
37+
- Include the index name, unless you know for a fact there's a default index. If unsure, use the collection-indexes tool to determine the index name.
38+
- The \`$search\` stage supports multiple operators, such as 'autocomplete', 'text', 'geoWithin', and others. Choose the approprate operator based on the user's query. If unsure of the exact syntax, consult the MongoDB Atlas Search documentation, which can be found here: https://www.mongodb.com/docs/atlas/atlas-search/operators-and-collectors/
3539
`;
3640

3741
const genericPipelineDescription = "An array of aggregation stages to execute.";

tests/accuracy/aggregate.test.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,4 +421,27 @@ describeAccuracyTests([
421421
},
422422
},
423423
},
424+
{
425+
prompt: "Run a $search query on mflix.movies to find all movies that mention 'space travel' in the plot or title. Use the default search index.",
426+
expectedToolCalls: [
427+
{
428+
toolName: "aggregate",
429+
parameters: {
430+
database: "mflix",
431+
collection: "movies",
432+
pipeline: [
433+
{
434+
$search: {
435+
index: Matcher.anyOf(Matcher.undefined, Matcher.value("default")),
436+
text: {
437+
query: "space travel",
438+
path: ["plot", "title"],
439+
},
440+
},
441+
},
442+
],
443+
},
444+
},
445+
],
446+
},
424447
]);

0 commit comments

Comments
 (0)