• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mongodb-js / mongodb-mcp-server / 19171504420

07 Nov 2025 02:33PM UTC coverage: 80.106% (-0.05%) from 80.151%
19171504420

Pull #717

github

web-flow
Merge 6c3142b5f into 454e81617
Pull Request #717: chore: adds field embeddings validation for quantization "none" and warn when vectorSearch is not configured correctly

1369 of 1823 branches covered (75.1%)

Branch coverage included in aggregate %.

32 of 38 new or added lines in 3 files covered. (84.21%)

1 existing line in 1 file now uncovered.

6503 of 8004 relevant lines covered (81.25%)

69.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.89
/src/common/search/vectorSearchEmbeddingsManager.ts
1
import type { NodeDriverServiceProvider } from "@mongosh/service-provider-node-driver";
2
import { BSON, type Document } from "bson";
3✔
3
import type { UserConfig } from "../config.js";
4
import type { ConnectionManager } from "../connectionManager.js";
5
import z from "zod";
3✔
6
import { ErrorCodes, MongoDBError } from "../errors.js";
3✔
7
import { getEmbeddingsProvider } from "./embeddingsProvider.js";
3✔
8
import type { EmbeddingParameters } from "../../tools/mongodb/mongodbSchemas.js";
9
import { formatUntrustedData } from "../../tools/tool.js";
3✔
10
import type { Similarity } from "../schemas.js";
11
import type { SupportedEmbeddingParameters } from "../../tools/mongodb/mongodbSchemas.js";
12

13
export const quantizationEnum = z.enum(["none", "scalar", "binary"]);
3✔
14
export type Quantization = z.infer<typeof quantizationEnum>;
15

16
export type VectorFieldIndexDefinition = {
17
    type: "vector";
18
    path: string;
19
    numDimensions: number;
20
    quantization: Quantization;
21
    similarity: Similarity;
22
};
23

24
export type VectorFieldValidationError = {
25
    path: string;
26
    expectedNumDimensions: number;
27
    expectedQuantization: Quantization;
28
    actualNumDimensions: number | "unknown";
29
    actualQuantization: Quantization | "unknown";
30
    error: "dimension-mismatch" | "quantization-mismatch" | "not-a-vector" | "not-numeric";
31
};
32

33
export type EmbeddingNamespace = `${string}.${string}`;
34
export class VectorSearchEmbeddingsManager {
3✔
35
    constructor(
3✔
36
        private readonly config: UserConfig,
159✔
37
        private readonly connectionManager: ConnectionManager,
159✔
38
        private readonly embeddings: Map<EmbeddingNamespace, VectorFieldIndexDefinition[]> = new Map(),
159✔
39
        private readonly embeddingsProvider: typeof getEmbeddingsProvider = getEmbeddingsProvider
159✔
40
    ) {
159✔
41
        connectionManager.events.on("connection-close", () => {
159✔
42
            this.embeddings.clear();
305✔
43
        });
159✔
44
    }
159✔
45

46
    cleanupEmbeddingsForNamespace({ database, collection }: { database: string; collection: string }): void {
3✔
47
        const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
4✔
48
        this.embeddings.delete(embeddingDefKey);
4✔
49
    }
4✔
50

51
    async indexExists({
3✔
52
        database,
×
53
        collection,
×
54
        indexName,
×
55
    }: {
×
56
        database: string;
57
        collection: string;
58
        indexName: string;
59
    }): Promise<boolean> {
×
60
        const provider = await this.atlasSearchEnabledProvider();
×
61
        if (!provider) {
×
62
            return false;
×
63
        }
×
64

65
        const searchIndexesWithName = await provider.getSearchIndexes(database, collection, indexName);
×
66

67
        return searchIndexesWithName.length >= 1;
×
68
    }
×
69

70
    async embeddingsForNamespace({
3✔
71
        database,
44✔
72
        collection,
44✔
73
    }: {
44✔
74
        database: string;
75
        collection: string;
76
    }): Promise<VectorFieldIndexDefinition[]> {
44✔
77
        const provider = await this.atlasSearchEnabledProvider();
44✔
78
        if (!provider) {
44!
79
            return [];
×
80
        }
×
81

82
        // We only need the embeddings for validation now, so don't query them if
83
        // validation is disabled.
84
        if (this.config.disableEmbeddingsValidation) {
44!
85
            return [];
×
86
        }
×
87

88
        const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
44✔
89
        const definition = this.embeddings.get(embeddingDefKey);
44✔
90

91
        if (!definition) {
44✔
92
            const allSearchIndexes = await provider.getSearchIndexes(database, collection);
11✔
93
            const vectorSearchIndexes = allSearchIndexes.filter((index) => index.type === "vectorSearch");
10✔
94
            const vectorFields = vectorSearchIndexes
10✔
95
                // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
96
                .flatMap<Document>((index) => (index.latestDefinition?.fields as Document[]) ?? [])
10!
97
                .filter((field) => this.isVectorFieldIndexDefinition(field));
10✔
98

99
            this.embeddings.set(embeddingDefKey, vectorFields);
10✔
100
            return vectorFields;
10✔
101
        }
10✔
102

103
        return definition;
33✔
104
    }
44✔
105

106
    async assertFieldsHaveCorrectEmbeddings(
3✔
107
        { database, collection }: { database: string; collection: string },
30✔
108
        documents: Document[]
30✔
109
    ): Promise<void> {
30✔
110
        const embeddingValidationResults = (
30✔
111
            await Promise.all(
30✔
112
                documents.map((document) => this.findFieldsWithWrongEmbeddings({ database, collection }, document))
30✔
113
            )
30✔
114
        ).flat();
30✔
115

116
        if (embeddingValidationResults.length > 0) {
30!
117
            const embeddingValidationMessages = embeddingValidationResults.map(
11✔
118
                (validation) =>
11✔
119
                    `- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions and ${validation.expectedQuantization}` +
13✔
120
                    ` quantization, and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions}, ` +
13✔
121
                    `actual quantization: ${validation.actualQuantization}. Error: ${validation.error}`
13✔
122
            );
11✔
123

124
            throw new MongoDBError(
11✔
125
                ErrorCodes.AtlasVectorSearchInvalidQuery,
11✔
126
                formatUntrustedData("", ...embeddingValidationMessages)
11✔
127
                    .map(({ text }) => text)
11✔
128
                    .join("\n")
11✔
129
            );
11✔
130
        }
11✔
131
    }
30✔
132

133
    public async findFieldsWithWrongEmbeddings(
3✔
134
        {
61✔
135
            database,
61✔
136
            collection,
61✔
137
        }: {
61✔
138
            database: string;
139
            collection: string;
140
        },
141
        document: Document
61✔
142
    ): Promise<VectorFieldValidationError[]> {
61✔
143
        const provider = await this.atlasSearchEnabledProvider();
61✔
144
        if (!provider) {
61!
145
            return [];
21✔
146
        }
21✔
147

148
        // While we can do our best effort to ensure that the embedding validation is correct
149
        // based on https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-quantization/
150
        // it's a complex process so we will also give the user the ability to disable this validation
151
        if (this.config.disableEmbeddingsValidation) {
43!
152
            return [];
5✔
153
        }
5✔
154

155
        const embeddings = await this.embeddingsForNamespace({ database, collection });
35✔
156
        return embeddings
35✔
157
            .map((emb) => this.getValidationErrorForDocument(emb, document))
35✔
158
            .filter((e) => e !== undefined);
35✔
159
    }
61✔
160

161
    private async atlasSearchEnabledProvider(): Promise<NodeDriverServiceProvider | null> {
3✔
162
        const connectionState = this.connectionManager.currentConnectionState;
107✔
163
        if (connectionState.tag === "connected" && (await connectionState.isSearchSupported())) {
107✔
164
            return connectionState.serviceProvider;
86✔
165
        }
86!
166

167
        return null;
21✔
168
    }
107✔
169

170
    private isVectorFieldIndexDefinition(doc: Document): doc is VectorFieldIndexDefinition {
3✔
171
        return doc["type"] === "vector";
18✔
172
    }
18✔
173

174
    private getValidationErrorForDocument(
3✔
175
        definition: VectorFieldIndexDefinition,
152✔
176
        document: Document
152✔
177
    ): VectorFieldValidationError | undefined {
152✔
178
        const fieldPath = definition.path.split(".");
152✔
179
        let fieldRef: unknown = document;
152✔
180

181
        const constructError = (
152✔
182
            details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions" | "actualQuantization">>
19✔
183
        ): VectorFieldValidationError => ({
19✔
184
            path: definition.path,
19✔
185
            expectedNumDimensions: definition.numDimensions,
19✔
186
            expectedQuantization: definition.quantization,
19✔
187
            actualNumDimensions: details.actualNumDimensions ?? "unknown",
19✔
188
            actualQuantization: details.actualQuantization ?? "unknown",
19✔
189
            error: details.error ?? "not-a-vector",
19!
190
        });
19✔
191

192
        for (const field of fieldPath) {
152✔
193
            if (fieldRef && typeof fieldRef === "object" && field in fieldRef) {
182✔
194
                fieldRef = (fieldRef as Record<string, unknown>)[field];
60✔
195
            } else {
182✔
196
                return undefined;
122✔
197
            }
122✔
198
        }
182✔
199

200
        switch (definition.quantization) {
30✔
201
            // Quantization "none" means no quantization is performed, so
202
            // full-fidelity vectors are stored therefore the underlying vector
203
            // must be stored as an array of numbers having the same dimension
204
            // as that of the index.
205
            case "none":
152✔
206
                if (!Array.isArray(fieldRef)) {
3✔
207
                    return constructError({
1✔
208
                        error: "not-a-vector",
1✔
209
                    });
1✔
210
                }
1✔
211

212
                if (fieldRef.length !== definition.numDimensions) {
3✔
213
                    return constructError({
1✔
214
                        actualNumDimensions: fieldRef.length,
1✔
215
                        actualQuantization: "none",
1✔
216
                        error: "dimension-mismatch",
1✔
217
                    });
1✔
218
                }
1✔
219

220
                if (fieldRef.some((e) => !this.isANumber(e))) {
1✔
221
                    return constructError({
1✔
222
                        actualNumDimensions: fieldRef.length,
1✔
223
                        actualQuantization: "none",
1✔
224
                        error: "not-numeric",
1✔
225
                    });
1✔
226
                }
1!
227

UNCOV
228
                return undefined;
×
229
            case "scalar":
152✔
230
            case "binary":
152✔
231
                if (fieldRef instanceof BSON.Binary) {
27✔
232
                    try {
2✔
233
                        const elements = fieldRef.toFloat32Array();
2✔
234
                        if (elements.length !== definition.numDimensions) {
2!
235
                            return constructError({
×
236
                                actualNumDimensions: elements.length,
×
237
                                actualQuantization: "binary",
×
238
                                error: "dimension-mismatch",
×
239
                            });
×
240
                        }
×
241

242
                        return undefined;
×
243
                    } catch {
2✔
244
                        // bits are also supported
245
                        try {
2✔
246
                            const bits = fieldRef.toBits();
2✔
247
                            if (bits.length !== definition.numDimensions) {
2!
248
                                return constructError({
×
249
                                    actualNumDimensions: bits.length,
×
250
                                    actualQuantization: "binary",
×
251
                                    error: "dimension-mismatch",
×
252
                                });
×
253
                            }
×
254

255
                            return undefined;
2✔
256
                        } catch {
2!
257
                            return constructError({
×
258
                                actualQuantization: "binary",
×
259
                                error: "not-a-vector",
×
260
                            });
×
261
                        }
×
262
                    }
2✔
263
                } else {
27✔
264
                    if (!Array.isArray(fieldRef)) {
25✔
265
                        return constructError({
5✔
266
                            error: "not-a-vector",
5✔
267
                        });
5✔
268
                    }
5✔
269

270
                    if (fieldRef.length !== definition.numDimensions) {
25✔
271
                        return constructError({
9✔
272
                            actualNumDimensions: fieldRef.length,
9✔
273
                            actualQuantization: "scalar",
9✔
274
                            error: "dimension-mismatch",
9✔
275
                        });
9✔
276
                    }
9✔
277

278
                    if (fieldRef.some((e) => !this.isANumber(e))) {
25✔
279
                        return constructError({
2✔
280
                            actualNumDimensions: fieldRef.length,
2✔
281
                            actualQuantization: "scalar",
2✔
282
                            error: "not-numeric",
2✔
283
                        });
2✔
284
                    }
2✔
285
                }
25✔
286

287
                break;
9✔
288
        }
152✔
289

290
        return undefined;
9✔
291
    }
152✔
292

293
    public async assertVectorSearchIndexExists({
3✔
294
        database,
3✔
295
        collection,
3✔
296
        path,
3✔
297
    }: {
3✔
298
        database: string;
299
        collection: string;
300
        path: string;
301
    }): Promise<void> {
3✔
302
        const embeddingInfoForCollection = await this.embeddingsForNamespace({ database, collection });
3✔
303
        const embeddingInfoForPath = embeddingInfoForCollection.find((definition) => definition.path === path);
2✔
304
        if (!embeddingInfoForPath) {
3✔
305
            throw new MongoDBError(
1✔
306
                ErrorCodes.AtlasVectorSearchIndexNotFound,
1✔
307
                `No Vector Search index found for path "${path}" in namespace "${database}.${collection}"`
1✔
308
            );
1✔
309
        }
1✔
310
    }
3✔
311

312
    public async generateEmbeddings({
3✔
313
        rawValues,
2✔
314
        embeddingParameters,
2✔
315
        inputType,
2✔
316
    }: {
2✔
317
        rawValues: string[];
318
        embeddingParameters: SupportedEmbeddingParameters;
319
        inputType: EmbeddingParameters["inputType"];
320
    }): Promise<unknown[][]> {
2✔
321
        const provider = await this.atlasSearchEnabledProvider();
2✔
322
        if (!provider) {
2!
323
            throw new MongoDBError(
×
324
                ErrorCodes.AtlasSearchNotSupported,
×
325
                "Atlas Search is not supported in this cluster."
×
326
            );
×
327
        }
×
328

329
        const embeddingsProvider = this.embeddingsProvider(this.config);
2✔
330

331
        if (!embeddingsProvider) {
2!
332
            throw new MongoDBError(ErrorCodes.NoEmbeddingsProviderConfigured, "No embeddings provider configured.");
×
333
        }
×
334

335
        if (this.config.disableEmbeddingsValidation) {
2✔
336
            return await embeddingsProvider.embed(embeddingParameters.model, rawValues, {
1✔
337
                inputType,
1✔
338
                ...embeddingParameters,
1✔
339
            });
1✔
340
        }
1✔
341

342
        return await embeddingsProvider.embed(embeddingParameters.model, rawValues, {
1✔
343
            inputType,
1✔
344
            ...embeddingParameters,
1✔
345
        });
1✔
346
    }
2✔
347

348
    private isANumber(value: unknown): boolean {
3✔
349
        if (typeof value === "number") {
75✔
350
            return true;
40✔
351
        }
40✔
352

353
        if (
35✔
354
            value instanceof BSON.Int32 ||
35✔
355
            value instanceof BSON.Decimal128 ||
19✔
356
            value instanceof BSON.Double ||
19✔
357
            value instanceof BSON.Long
11✔
358
        ) {
75✔
359
            return true;
32✔
360
        }
32✔
361

362
        return false;
3✔
363
    }
75✔
364
}
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc