• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mongodb-js / mongodb-mcp-server / 18978566012

31 Oct 2025 04:18PM UTC coverage: 80.142% (+0.2%) from 79.922%
18978566012

Pull #653

github

web-flow
Merge f636ea300 into f56f77206
Pull Request #653: chore: update atlas tools output to json - MCP-264

1349 of 1803 branches covered (74.82%)

Branch coverage included in aggregate %.

37 of 60 new or added lines in 6 files covered. (61.67%)

46 existing lines in 7 files now uncovered.

6428 of 7901 relevant lines covered (81.36%)

70.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.21
/src/common/search/vectorSearchEmbeddingsManager.ts
1
import type { NodeDriverServiceProvider } from "@mongosh/service-provider-node-driver";
2
import { BSON, type Document } from "bson";
3✔
3
import type { UserConfig } from "../config.js";
4
import type { ConnectionManager } from "../connectionManager.js";
5
import z from "zod";
3✔
6
import { ErrorCodes, MongoDBError } from "../errors.js";
3✔
7
import { getEmbeddingsProvider } from "./embeddingsProvider.js";
3✔
8
import type { EmbeddingParameters, SupportedEmbeddingParameters } from "./embeddingsProvider.js";
9
import { formatUntrustedData } from "../../tools/tool.js";
3✔
10
import type { Similarity } from "../schemas.js";
11

12
export const quantizationEnum = z.enum(["none", "scalar", "binary"]);
3✔
13
export type Quantization = z.infer<typeof quantizationEnum>;
14

15
export type VectorFieldIndexDefinition = {
16
    type: "vector";
17
    path: string;
18
    numDimensions: number;
19
    quantization: Quantization;
20
    similarity: Similarity;
21
};
22

23
export type VectorFieldValidationError = {
24
    path: string;
25
    expectedNumDimensions: number;
26
    expectedQuantization: Quantization;
27
    actualNumDimensions: number | "unknown";
28
    actualQuantization: Quantization | "unknown";
29
    error: "dimension-mismatch" | "quantization-mismatch" | "not-a-vector" | "not-numeric";
30
};
31

32
export type EmbeddingNamespace = `${string}.${string}`;
33
export class VectorSearchEmbeddingsManager {
3✔
34
    constructor(
3✔
35
        private readonly config: UserConfig,
156✔
36
        private readonly connectionManager: ConnectionManager,
156✔
37
        private readonly embeddings: Map<EmbeddingNamespace, VectorFieldIndexDefinition[]> = new Map(),
156✔
38
        private readonly embeddingsProvider: typeof getEmbeddingsProvider = getEmbeddingsProvider
156✔
39
    ) {
156✔
40
        connectionManager.events.on("connection-close", () => {
156✔
41
            this.embeddings.clear();
308✔
42
        });
156✔
43
    }
156✔
44

45
    cleanupEmbeddingsForNamespace({ database, collection }: { database: string; collection: string }): void {
3✔
46
        const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
4✔
47
        this.embeddings.delete(embeddingDefKey);
4✔
48
    }
4✔
49

50
    async indexExists({
3✔
51
        database,
1✔
52
        collection,
1✔
53
        indexName,
1✔
54
    }: {
1✔
55
        database: string;
56
        collection: string;
57
        indexName: string;
58
    }): Promise<boolean> {
1✔
59
        const provider = await this.atlasSearchEnabledProvider();
1✔
60
        if (!provider) {
1!
UNCOV
61
            return false;
×
UNCOV
62
        }
×
63

64
        const searchIndexesWithName = await provider.getSearchIndexes(database, collection, indexName);
1✔
65

66
        return searchIndexesWithName.length >= 1;
1✔
67
    }
1✔
68

69
    async embeddingsForNamespace({
3✔
70
        database,
41✔
71
        collection,
41✔
72
    }: {
41✔
73
        database: string;
74
        collection: string;
75
    }): Promise<VectorFieldIndexDefinition[]> {
41✔
76
        const provider = await this.atlasSearchEnabledProvider();
41✔
77
        if (!provider) {
41!
UNCOV
78
            return [];
×
UNCOV
79
        }
×
80

81
        // We only need the embeddings for validation now, so don't query them if
82
        // validation is disabled.
83
        if (this.config.disableEmbeddingsValidation) {
41!
UNCOV
84
            return [];
×
UNCOV
85
        }
×
86

87
        const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
41✔
88
        const definition = this.embeddings.get(embeddingDefKey);
41✔
89

90
        if (!definition) {
41✔
91
            const allSearchIndexes = await provider.getSearchIndexes(database, collection);
12✔
92
            const vectorSearchIndexes = allSearchIndexes.filter((index) => index.type === "vectorSearch");
11✔
93
            const vectorFields = vectorSearchIndexes
11✔
94
                // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
95
                .flatMap<Document>((index) => (index.latestDefinition?.fields as Document[]) ?? [])
11!
96
                .filter((field) => this.isVectorFieldIndexDefinition(field));
11✔
97

98
            this.embeddings.set(embeddingDefKey, vectorFields);
11✔
99
            return vectorFields;
11✔
100
        }
11!
101

102
        return definition;
29✔
103
    }
41✔
104

105
    async assertFieldsHaveCorrectEmbeddings(
3✔
106
        { database, collection }: { database: string; collection: string },
30✔
107
        documents: Document[]
30✔
108
    ): Promise<void> {
30✔
109
        const embeddingValidationResults = (
30✔
110
            await Promise.all(
30✔
111
                documents.map((document) => this.findFieldsWithWrongEmbeddings({ database, collection }, document))
30✔
112
            )
30✔
113
        ).flat();
30✔
114

115
        if (embeddingValidationResults.length > 0) {
30!
116
            const embeddingValidationMessages = embeddingValidationResults.map(
11✔
117
                (validation) =>
11✔
118
                    `- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions and ${validation.expectedQuantization}` +
13✔
119
                    ` quantization, and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions}, ` +
13✔
120
                    `actual quantization: ${validation.actualQuantization}. Error: ${validation.error}`
13✔
121
            );
11✔
122

123
            throw new MongoDBError(
11✔
124
                ErrorCodes.AtlasVectorSearchInvalidQuery,
11✔
125
                formatUntrustedData("", ...embeddingValidationMessages)
11✔
126
                    .map(({ text }) => text)
11✔
127
                    .join("\n")
11✔
128
            );
11✔
129
        }
11✔
130
    }
30✔
131

132
    public async findFieldsWithWrongEmbeddings(
3✔
133
        {
58✔
134
            database,
58✔
135
            collection,
58✔
136
        }: {
58✔
137
            database: string;
138
            collection: string;
139
        },
140
        document: Document
58✔
141
    ): Promise<VectorFieldValidationError[]> {
58✔
142
        const provider = await this.atlasSearchEnabledProvider();
58✔
143
        if (!provider) {
58!
144
            return [];
21✔
145
        }
21✔
146

147
        // While we can do our best effort to ensure that the embedding validation is correct
148
        // based on https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-quantization/
149
        // it's a complex process so we will also give the user the ability to disable this validation
150
        if (this.config.disableEmbeddingsValidation) {
40!
151
            return [];
5✔
152
        }
5✔
153

154
        const embeddings = await this.embeddingsForNamespace({ database, collection });
32✔
155
        return embeddings
32✔
156
            .map((emb) => this.getValidationErrorForDocument(emb, document))
32✔
157
            .filter((e) => e !== undefined);
32✔
158
    }
58✔
159

160
    private async atlasSearchEnabledProvider(): Promise<NodeDriverServiceProvider | null> {
3✔
161
        const connectionState = this.connectionManager.currentConnectionState;
102✔
162
        if (connectionState.tag === "connected" && (await connectionState.isSearchSupported())) {
102✔
163
            return connectionState.serviceProvider;
81✔
164
        }
81!
165

166
        return null;
21✔
167
    }
102✔
168

169
    private isVectorFieldIndexDefinition(doc: Document): doc is VectorFieldIndexDefinition {
3✔
170
        return doc["type"] === "vector";
18✔
171
    }
18✔
172

173
    private getValidationErrorForDocument(
3✔
174
        definition: VectorFieldIndexDefinition,
110✔
175
        document: Document
110✔
176
    ): VectorFieldValidationError | undefined {
110✔
177
        const fieldPath = definition.path.split(".");
110✔
178
        let fieldRef: unknown = document;
110✔
179

180
        const constructError = (
110✔
181
            details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions" | "actualQuantization">>
16✔
182
        ): VectorFieldValidationError => ({
16✔
183
            path: definition.path,
16✔
184
            expectedNumDimensions: definition.numDimensions,
16✔
185
            expectedQuantization: definition.quantization,
16✔
186
            actualNumDimensions: details.actualNumDimensions ?? "unknown",
16✔
187
            actualQuantization: details.actualQuantization ?? "unknown",
16✔
188
            error: details.error ?? "not-a-vector",
16!
189
        });
16✔
190

191
        for (const field of fieldPath) {
110✔
192
            if (fieldRef && typeof fieldRef === "object" && field in fieldRef) {
140✔
193
                fieldRef = (fieldRef as Record<string, unknown>)[field];
57✔
194
            } else {
140✔
195
                return undefined;
83✔
196
            }
83✔
197
        }
140✔
198

199
        switch (definition.quantization) {
27✔
200
            // Because quantization is not defined by the user
201
            // we have to trust them in the format they use.
202
            case "none":
110!
UNCOV
203
                return undefined;
×
204
            case "scalar":
110✔
205
            case "binary":
110✔
206
                if (fieldRef instanceof BSON.Binary) {
27✔
207
                    try {
2✔
208
                        const elements = fieldRef.toFloat32Array();
2✔
209
                        if (elements.length !== definition.numDimensions) {
2!
UNCOV
210
                            return constructError({
×
UNCOV
211
                                actualNumDimensions: elements.length,
×
212
                                actualQuantization: "binary",
×
213
                                error: "dimension-mismatch",
×
214
                            });
×
215
                        }
×
216

217
                        return undefined;
×
218
                    } catch {
2✔
219
                        // bits are also supported
220
                        try {
2✔
221
                            const bits = fieldRef.toBits();
2✔
222
                            if (bits.length !== definition.numDimensions) {
2!
UNCOV
223
                                return constructError({
×
UNCOV
224
                                    actualNumDimensions: bits.length,
×
225
                                    actualQuantization: "binary",
×
226
                                    error: "dimension-mismatch",
×
227
                                });
×
228
                            }
×
229

230
                            return undefined;
2✔
231
                        } catch {
2!
UNCOV
232
                            return constructError({
×
UNCOV
233
                                actualQuantization: "binary",
×
234
                                error: "not-a-vector",
×
235
                            });
×
236
                        }
×
237
                    }
2✔
238
                } else {
27✔
239
                    if (!Array.isArray(fieldRef)) {
25✔
240
                        return constructError({
5✔
241
                            error: "not-a-vector",
5✔
242
                        });
5✔
243
                    }
5✔
244

245
                    if (fieldRef.length !== definition.numDimensions) {
25✔
246
                        return constructError({
9✔
247
                            actualNumDimensions: fieldRef.length,
9✔
248
                            actualQuantization: "scalar",
9✔
249
                            error: "dimension-mismatch",
9✔
250
                        });
9✔
251
                    }
9✔
252

253
                    if (!fieldRef.every((e) => this.isANumber(e))) {
25✔
254
                        return constructError({
2✔
255
                            actualNumDimensions: fieldRef.length,
2✔
256
                            actualQuantization: "scalar",
2✔
257
                            error: "not-numeric",
2✔
258
                        });
2✔
259
                    }
2✔
260
                }
25✔
261

262
                break;
9✔
263
        }
110✔
264

265
        return undefined;
9✔
266
    }
110✔
267

268
    public async assertVectorSearchIndexExists({
3✔
269
        database,
3✔
270
        collection,
3✔
271
        path,
3✔
272
    }: {
3✔
273
        database: string;
274
        collection: string;
275
        path: string;
276
    }): Promise<void> {
3✔
277
        const embeddingInfoForCollection = await this.embeddingsForNamespace({ database, collection });
3✔
278
        const embeddingInfoForPath = embeddingInfoForCollection.find((definition) => definition.path === path);
2✔
279
        if (!embeddingInfoForPath) {
3✔
280
            throw new MongoDBError(
1✔
281
                ErrorCodes.AtlasVectorSearchIndexNotFound,
1✔
282
                `No Vector Search index found for path "${path}" in namespace "${database}.${collection}"`
1✔
283
            );
1✔
284
        }
1✔
285
    }
3✔
286

287
    public async generateEmbeddings({
3✔
288
        rawValues,
2✔
289
        embeddingParameters,
2✔
290
        inputType,
2✔
291
    }: {
2✔
292
        rawValues: string[];
293
        embeddingParameters: SupportedEmbeddingParameters;
294
        inputType: EmbeddingParameters["inputType"];
295
    }): Promise<unknown[][]> {
2✔
296
        const provider = await this.atlasSearchEnabledProvider();
2✔
297
        if (!provider) {
2!
UNCOV
298
            throw new MongoDBError(
×
UNCOV
299
                ErrorCodes.AtlasSearchNotSupported,
×
300
                "Atlas Search is not supported in this cluster."
×
301
            );
×
302
        }
×
303

304
        const embeddingsProvider = this.embeddingsProvider(this.config);
2✔
305

306
        if (!embeddingsProvider) {
2!
UNCOV
307
            throw new MongoDBError(ErrorCodes.NoEmbeddingsProviderConfigured, "No embeddings provider configured.");
×
UNCOV
308
        }
×
309

310
        if (this.config.disableEmbeddingsValidation) {
2✔
311
            return await embeddingsProvider.embed(embeddingParameters.model, rawValues, {
1✔
312
                inputType,
1✔
313
                ...embeddingParameters,
1✔
314
            });
1✔
315
        }
1✔
316

317
        return await embeddingsProvider.embed(embeddingParameters.model, rawValues, {
1✔
318
            inputType,
1✔
319
            ...embeddingParameters,
1✔
320
        });
1✔
321
    }
2✔
322

323
    private isANumber(value: unknown): boolean {
3✔
324
        if (typeof value === "number") {
74✔
325
            return true;
40✔
326
        }
40✔
327

328
        if (
34✔
329
            value instanceof BSON.Int32 ||
34✔
330
            value instanceof BSON.Decimal128 ||
18✔
331
            value instanceof BSON.Double ||
18✔
332
            value instanceof BSON.Long
10✔
333
        ) {
74✔
334
            return true;
32✔
335
        }
32✔
336

337
        return false;
2✔
338
    }
74✔
339
}
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc