• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mongodb-js / mongodb-mcp-server / 18710860243

22 Oct 2025 08:56AM UTC coverage: 81.332% (-0.6%) from 81.905%
18710860243

Pull #662

github

web-flow
Merge 89a556dbd into 17b595b2f
Pull Request #662: chore: When querying with vectorSearch use the generated embeddings MCP-245

1366 of 1859 branches covered (73.48%)

Branch coverage included in aggregate %.

97 of 182 new or added lines in 4 files covered. (53.3%)

8 existing lines in 2 files now uncovered.

6289 of 7553 relevant lines covered (83.26%)

146.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.44
/src/common/search/vectorSearchEmbeddingsManager.ts
1
import type { NodeDriverServiceProvider } from "@mongosh/service-provider-node-driver";
2
import { BSON, type Document } from "bson";
6✔
3
import type { UserConfig } from "../config.js";
4
import type { ConnectionManager } from "../connectionManager.js";
5
import z from "zod";
6✔
6
import { ErrorCodes, MongoDBError } from "../errors.js";
6✔
7
import { getEmbeddingsProvider } from "./embeddingsProvider.js";
6✔
8
import type { EmbeddingParameters, SupportedEmbeddingParameters } from "./embeddingsProvider.js";
9

10
export const similarityEnum = z.enum(["cosine", "euclidean", "dotProduct"]);
6✔
11
export type Similarity = z.infer<typeof similarityEnum>;
12

13
export const quantizationEnum = z.enum(["none", "scalar", "binary"]);
6✔
14
export type Quantization = z.infer<typeof quantizationEnum>;
15

16
export type VectorFieldIndexDefinition = {
17
    type: "vector";
18
    path: string;
19
    numDimensions: number;
20
    quantization: Quantization;
21
    similarity: Similarity;
22
};
23

24
export type VectorFieldValidationError = {
25
    path: string;
26
    expectedNumDimensions: number;
27
    expectedQuantization: Quantization;
28
    actualNumDimensions: number | "unknown";
29
    actualQuantization: Quantization | "unknown";
30
    error: "dimension-mismatch" | "quantization-mismatch" | "not-a-vector" | "not-numeric";
31
};
32

33
export type EmbeddingNamespace = `${string}.${string}`;
34
export class VectorSearchEmbeddingsManager {
6✔
35
    constructor(
6✔
36
        private readonly config: UserConfig,
264✔
37
        private readonly connectionManager: ConnectionManager,
264✔
38
        private readonly embeddings: Map<EmbeddingNamespace, VectorFieldIndexDefinition[]> = new Map()
264✔
39
    ) {
264✔
40
        connectionManager.events.on("connection-close", () => {
264✔
41
            this.embeddings.clear();
610✔
42
        });
264✔
43
    }
264✔
44

45
    cleanupEmbeddingsForNamespace({ database, collection }: { database: string; collection: string }): void {
6✔
46
        const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
8✔
47
        this.embeddings.delete(embeddingDefKey);
8✔
48
    }
8✔
49

50
    async embeddingsForNamespace({
6✔
51
        database,
44✔
52
        collection,
44✔
53
    }: {
44✔
54
        database: string;
55
        collection: string;
56
    }): Promise<VectorFieldIndexDefinition[]> {
44✔
57
        const provider = await this.assertAtlasSearchIsAvailable();
44✔
58
        if (!provider) {
44!
UNCOV
59
            return [];
×
60
        }
×
61

62
        // We only need the embeddings for validation now, so don't query them if
63
        // validation is disabled.
64
        if (this.config.disableEmbeddingsValidation) {
44!
UNCOV
65
            return [];
×
66
        }
×
67

68
        const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
44✔
69
        const definition = this.embeddings.get(embeddingDefKey);
44✔
70

71
        if (!definition) {
44✔
72
            const allSearchIndexes = await provider.getSearchIndexes(database, collection);
18✔
73
            const vectorSearchIndexes = allSearchIndexes.filter((index) => index.type === "vectorSearch");
18✔
74
            const vectorFields = vectorSearchIndexes
18✔
75
                // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
76
                .flatMap<Document>((index) => (index.latestDefinition?.fields as Document[]) ?? [])
18!
77
                .filter((field) => this.isVectorFieldIndexDefinition(field));
18✔
78

79
            this.embeddings.set(embeddingDefKey, vectorFields);
18✔
80
            return vectorFields;
18✔
81
        }
18!
82

83
        return definition;
26✔
84
    }
44✔
85

86
    async findFieldsWithWrongEmbeddings(
6✔
87
        {
44✔
88
            database,
44✔
89
            collection,
44✔
90
        }: {
44✔
91
            database: string;
92
            collection: string;
93
        },
94
        document: Document
44✔
95
    ): Promise<VectorFieldValidationError[]> {
44✔
96
        const provider = await this.assertAtlasSearchIsAvailable();
44✔
97
        if (!provider) {
44!
98
            return [];
6✔
99
        }
6✔
100

101
        // While we can do our best effort to ensure that the embedding validation is correct
102
        // based on https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-quantization/
103
        // it's a complex process so we will also give the user the ability to disable this validation
104
        if (this.config.disableEmbeddingsValidation) {
44!
105
            return [];
6✔
106
        }
6✔
107

108
        const embeddings = await this.embeddingsForNamespace({ database, collection });
32✔
109
        return embeddings
32✔
110
            .map((emb) => this.getValidationErrorForDocument(emb, document))
32✔
111
            .filter((e) => e !== undefined);
32✔
112
    }
44✔
113

114
    private async assertAtlasSearchIsAvailable(): Promise<NodeDriverServiceProvider | null> {
6✔
115
        const connectionState = this.connectionManager.currentConnectionState;
88✔
116
        if (connectionState.tag === "connected" && (await connectionState.isSearchSupported())) {
88✔
117
            return connectionState.serviceProvider;
82✔
118
        }
82!
119

120
        return null;
6✔
121
    }
88✔
122

123
    private isVectorFieldIndexDefinition(doc: Document): doc is VectorFieldIndexDefinition {
6✔
124
        return doc["type"] === "vector";
34✔
125
    }
34✔
126

127
    private getValidationErrorForDocument(
6✔
128
        definition: VectorFieldIndexDefinition,
92✔
129
        document: Document
92✔
130
    ): VectorFieldValidationError | undefined {
92✔
131
        const fieldPath = definition.path.split(".");
92✔
132
        let fieldRef: unknown = document;
92✔
133

134
        const constructError = (
92✔
135
            details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions" | "actualQuantization">>
8✔
136
        ): VectorFieldValidationError => ({
8✔
137
            path: definition.path,
8✔
138
            expectedNumDimensions: definition.numDimensions,
8✔
139
            expectedQuantization: definition.quantization,
8✔
140
            actualNumDimensions: details.actualNumDimensions ?? "unknown",
8✔
141
            actualQuantization: details.actualQuantization ?? "unknown",
8✔
142
            error: details.error ?? "not-a-vector",
8!
143
        });
8✔
144

145
        for (const field of fieldPath) {
92✔
146
            if (fieldRef && typeof fieldRef === "object" && field in fieldRef) {
142✔
147
                fieldRef = (fieldRef as Record<string, unknown>)[field];
74✔
148
            } else {
142✔
149
                return undefined;
68✔
150
            }
68✔
151
        }
142✔
152

153
        switch (definition.quantization) {
24✔
154
            // Because quantization is not defined by the user
155
            // we have to trust them in the format they use.
156
            case "none":
92!
UNCOV
157
                return undefined;
×
158
            case "scalar":
92✔
159
            case "binary":
92✔
160
                if (fieldRef instanceof BSON.Binary) {
24✔
161
                    try {
4✔
162
                        const elements = fieldRef.toFloat32Array();
4✔
163
                        if (elements.length !== definition.numDimensions) {
4!
UNCOV
164
                            return constructError({
×
165
                                actualNumDimensions: elements.length,
×
166
                                actualQuantization: "binary",
×
167
                                error: "dimension-mismatch",
×
168
                            });
×
169
                        }
×
170

UNCOV
171
                        return undefined;
×
172
                    } catch {
4✔
173
                        // bits are also supported
174
                        try {
4✔
175
                            const bits = fieldRef.toBits();
4✔
176
                            if (bits.length !== definition.numDimensions) {
4!
UNCOV
177
                                return constructError({
×
178
                                    actualNumDimensions: bits.length,
×
179
                                    actualQuantization: "binary",
×
180
                                    error: "dimension-mismatch",
×
181
                                });
×
182
                            }
×
183

184
                            return undefined;
4✔
185
                        } catch {
4!
UNCOV
186
                            return constructError({
×
187
                                actualQuantization: "binary",
×
188
                                error: "not-a-vector",
×
189
                            });
×
190
                        }
×
191
                    }
4✔
192
                } else {
24✔
193
                    if (!Array.isArray(fieldRef)) {
20✔
194
                        return constructError({
4✔
195
                            error: "not-a-vector",
4✔
196
                        });
4✔
197
                    }
4✔
198

199
                    if (fieldRef.length !== definition.numDimensions) {
20✔
200
                        return constructError({
2✔
201
                            actualNumDimensions: fieldRef.length,
2✔
202
                            actualQuantization: "scalar",
2✔
203
                            error: "dimension-mismatch",
2✔
204
                        });
2✔
205
                    }
2✔
206

207
                    if (!fieldRef.every((e) => this.isANumber(e))) {
20✔
208
                        return constructError({
2✔
209
                            actualNumDimensions: fieldRef.length,
2✔
210
                            actualQuantization: "scalar",
2✔
211
                            error: "not-numeric",
2✔
212
                        });
2✔
213
                    }
2✔
214
                }
20✔
215

216
                break;
12✔
217
        }
92✔
218

219
        return undefined;
12✔
220
    }
92✔
221

222
    public async generateEmbeddings({
6✔
NEW
223
        database,
×
NEW
224
        collection,
×
NEW
225
        path,
×
NEW
226
        rawValues,
×
NEW
227
        embeddingParameters,
×
NEW
228
        inputType,
×
NEW
229
    }: {
×
230
        database: string;
231
        collection: string;
232
        path: string;
233
        rawValues: string[];
234
        embeddingParameters: SupportedEmbeddingParameters;
235
        inputType: EmbeddingParameters["inputType"];
NEW
236
    }): Promise<unknown[]> {
×
NEW
237
        const provider = await this.assertAtlasSearchIsAvailable();
×
NEW
238
        if (!provider) {
×
NEW
239
            throw new MongoDBError(
×
NEW
240
                ErrorCodes.AtlasSearchNotSupported,
×
NEW
241
                "Atlas Search is not supported in this cluster."
×
NEW
242
            );
×
NEW
243
        }
×
244

NEW
245
        const embeddingsProvider = getEmbeddingsProvider(this.config);
×
246

NEW
247
        if (!embeddingsProvider) {
×
NEW
248
            throw new MongoDBError(ErrorCodes.NoEmbeddingsProviderConfigured, "No embeddings provider configured.");
×
NEW
249
        }
×
250

NEW
251
        const embeddingInfoForCollection = await this.embeddingsForNamespace({ database, collection });
×
NEW
252
        const embeddingInfoForPath = embeddingInfoForCollection.find((definition) => definition.path === path);
×
253

NEW
254
        if (!embeddingInfoForPath) {
×
NEW
255
            throw new MongoDBError(
×
NEW
256
                ErrorCodes.AtlasVectorSearchIndexNotFound,
×
NEW
257
                `No Vector Search index found for path "${path}" in namespace "${database}.${collection}"`
×
NEW
258
            );
×
NEW
259
        }
×
260

NEW
261
        return await embeddingsProvider.embed(embeddingParameters.model, rawValues, {
×
NEW
262
            inputType,
×
NEW
263
            ...embeddingParameters,
×
NEW
264
        });
×
NEW
265
    }
×
266

267
    private isANumber(value: unknown): boolean {
6✔
268
        if (typeof value === "number") {
98✔
269
            return true;
32✔
270
        }
32✔
271

272
        if (
66✔
273
            value instanceof BSON.Int32 ||
66✔
274
            value instanceof BSON.Decimal128 ||
34✔
275
            value instanceof BSON.Double ||
34✔
276
            value instanceof BSON.Long
18✔
277
        ) {
98✔
278
            return true;
64✔
279
        }
64✔
280

281
        return false;
2✔
282
    }
98✔
283
}
6✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc