Skip to content

Commit 5cd2e51

Browse files
chore: validate when quantization is 'none'
1 parent 198b36e commit 5cd2e51

File tree

2 files changed

+98
-44
lines changed

2 files changed

+98
-44
lines changed

src/common/search/vectorSearchEmbeddingsManager.ts

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,33 @@ export class VectorSearchEmbeddingsManager {
198198
}
199199

200200
switch (definition.quantization) {
201-
// Because quantization is not defined by the user
202-
// we have to trust them in the format they use.
201+
// Quantization "none" means no quantization is performed, so
202+
// full-fidelity vectors are stored therefore the underlying vector
203+
// must be stored as an array of numbers having the same dimension
204+
// as that of the index.
203205
case "none":
206+
if (!Array.isArray(fieldRef)) {
207+
return constructError({
208+
error: "not-a-vector",
209+
});
210+
}
211+
212+
if (fieldRef.length !== definition.numDimensions) {
213+
return constructError({
214+
actualNumDimensions: fieldRef.length,
215+
actualQuantization: "none",
216+
error: "dimension-mismatch",
217+
});
218+
}
219+
220+
if (!fieldRef.every((e) => this.isANumber(e))) {
221+
return constructError({
222+
actualNumDimensions: fieldRef.length,
223+
actualQuantization: "none",
224+
error: "not-numeric",
225+
});
226+
}
227+
204228
return undefined;
205229
case "scalar":
206230
case "binary":

tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts

Lines changed: 72 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ const embeddingConfig: Map<EmbeddingNamespace, VectorFieldIndexDefinition[]> = n
4242
[
4343
mapKey,
4444
[
45+
{
46+
type: "vector",
47+
path: "embedding_field_wo_quantization",
48+
numDimensions: 8,
49+
quantization: "none",
50+
similarity: "euclidean",
51+
},
4552
{
4653
type: "vector",
4754
path: "embedding_field",
@@ -278,51 +285,74 @@ describe("VectorSearchEmbeddingsManager", () => {
278285
expect(result).toHaveLength(0);
279286
});
280287

281-
it("documents inserting the field with wrong type are invalid", async () => {
282-
const result = await embeddings.findFieldsWithWrongEmbeddings(
283-
{ database, collection },
284-
{ embedding_field: "some text" }
285-
);
286-
287-
expect(result).toHaveLength(1);
288-
});
288+
it.each(["embedding_field", "embedding_field_wo_quantization"] as const)(
289+
"documents inserting the field with wrong type are invalid - $0",
290+
async (field) => {
291+
const result = await embeddings.findFieldsWithWrongEmbeddings(
292+
{ database, collection },
293+
{ [field]: "some text" }
294+
);
289295

290-
it("documents inserting the field with wrong dimensions are invalid", async () => {
291-
const result = await embeddings.findFieldsWithWrongEmbeddings(
292-
{ database, collection },
293-
{ embedding_field: [1, 2, 3] }
294-
);
295-
296-
expect(result).toHaveLength(1);
297-
const expectedError: VectorFieldValidationError = {
298-
actualNumDimensions: 3,
299-
actualQuantization: "scalar",
300-
error: "dimension-mismatch",
301-
expectedNumDimensions: 8,
302-
expectedQuantization: "scalar",
303-
path: "embedding_field",
304-
};
305-
expect(result[0]).toEqual(expectedError);
306-
});
296+
expect(result).toHaveLength(1);
297+
}
298+
);
307299

308-
it("documents inserting the field with correct dimensions, but wrong type are invalid", async () => {
309-
const result = await embeddings.findFieldsWithWrongEmbeddings(
310-
{ database, collection },
311-
{ embedding_field: ["1", "2", "3", "4", "5", "6", "7", "8"] }
312-
);
300+
it.each([
301+
{ path: "embedding_field", expectedQuantization: "scalar", actualQuantization: "scalar" },
302+
{
303+
path: "embedding_field_wo_quantization",
304+
expectedQuantization: "none",
305+
actualQuantization: "none",
306+
},
307+
] as const)(
308+
"documents inserting the field with wrong dimensions are invalid - path = $path",
309+
async ({ path, expectedQuantization, actualQuantization }) => {
310+
const result = await embeddings.findFieldsWithWrongEmbeddings(
311+
{ database, collection },
312+
{ [path]: [1, 2, 3] }
313+
);
314+
315+
expect(result).toHaveLength(1);
316+
const expectedError: VectorFieldValidationError = {
317+
actualNumDimensions: 3,
318+
actualQuantization,
319+
error: "dimension-mismatch",
320+
expectedNumDimensions: 8,
321+
expectedQuantization,
322+
path,
323+
};
324+
expect(result[0]).toEqual(expectedError);
325+
}
326+
);
313327

314-
expect(result).toHaveLength(1);
315-
const expectedError: VectorFieldValidationError = {
316-
actualNumDimensions: 8,
317-
actualQuantization: "scalar",
318-
error: "not-numeric",
319-
expectedNumDimensions: 8,
320-
expectedQuantization: "scalar",
321-
path: "embedding_field",
322-
};
323-
324-
expect(result[0]).toEqual(expectedError);
325-
});
328+
it.each([
329+
{ path: "embedding_field", expectedQuantization: "scalar", actualQuantization: "scalar" },
330+
{
331+
path: "embedding_field_wo_quantization",
332+
expectedQuantization: "none",
333+
actualQuantization: "none",
334+
},
335+
] as const)(
336+
"documents inserting the field with correct dimensions, but wrong type are invalid - $path",
337+
async ({ path, expectedQuantization, actualQuantization }) => {
338+
const result = await embeddings.findFieldsWithWrongEmbeddings(
339+
{ database, collection },
340+
{ [path]: ["1", "2", "3", "4", "5", "6", "7", "8"] }
341+
);
342+
343+
expect(result).toHaveLength(1);
344+
const expectedError: VectorFieldValidationError = {
345+
actualNumDimensions: 8,
346+
actualQuantization,
347+
error: "not-numeric",
348+
expectedNumDimensions: 8,
349+
expectedQuantization,
350+
path,
351+
};
352+
353+
expect(result[0]).toEqual(expectedError);
354+
}
355+
);
326356

327357
it("documents inserting the field with correct dimensions and quantization in binary are valid", async () => {
328358
const result = await embeddings.findFieldsWithWrongEmbeddings(

0 commit comments

Comments
 (0)