• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mongodb-js / mongodb-mcp-server / 18871399446

28 Oct 2025 10:15AM UTC coverage: 80.225% (+0.1%) from 80.103%
18871399446

Pull #693

github

web-flow
Merge f0d4a3804 into 34c9c68ca
Pull Request #693: chore: check that a vector search index exists with indexCheck

1353 of 1827 branches covered (74.06%)

Branch coverage included in aggregate %.

51 of 55 new or added lines in 2 files covered. (92.73%)

28 existing lines in 2 files now uncovered.

6355 of 7781 relevant lines covered (81.67%)

71.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.79
/src/common/search/vectorSearchEmbeddingsManager.ts
1
import type { NodeDriverServiceProvider } from "@mongosh/service-provider-node-driver";
2
import { BSON, type Document } from "bson";
3✔
3
import type { UserConfig } from "../config.js";
4
import type { ConnectionManager } from "../connectionManager.js";
5
import z from "zod";
3✔
6
import { ErrorCodes, MongoDBError } from "../errors.js";
3✔
7
import { getEmbeddingsProvider } from "./embeddingsProvider.js";
3✔
8
import type { EmbeddingParameters, SupportedEmbeddingParameters } from "./embeddingsProvider.js";
9

10
export const similarityEnum = z.enum(["cosine", "euclidean", "dotProduct"]);
3✔
11
export type Similarity = z.infer<typeof similarityEnum>;
12

13
export const quantizationEnum = z.enum(["none", "scalar", "binary"]);
3✔
14
export type Quantization = z.infer<typeof quantizationEnum>;
15

16
export type VectorFieldIndexDefinition = {
17
    type: "vector";
18
    path: string;
19
    numDimensions: number;
20
    quantization: Quantization;
21
    similarity: Similarity;
22
};
23

24
export type VectorFieldValidationError = {
25
    path: string;
26
    expectedNumDimensions: number;
27
    expectedQuantization: Quantization;
28
    actualNumDimensions: number | "unknown";
29
    actualQuantization: Quantization | "unknown";
30
    error: "dimension-mismatch" | "quantization-mismatch" | "not-a-vector" | "not-numeric";
31
};
32

33
export type EmbeddingNamespace = `${string}.${string}`;
34
export class VectorSearchEmbeddingsManager {
3✔
35
    constructor(
3✔
36
        private readonly config: UserConfig,
140✔
37
        private readonly connectionManager: ConnectionManager,
140✔
38
        private readonly embeddings: Map<EmbeddingNamespace, VectorFieldIndexDefinition[]> = new Map(),
140✔
39
        private readonly embeddingsProvider: typeof getEmbeddingsProvider = getEmbeddingsProvider
140✔
40
    ) {
140✔
41
        connectionManager.events.on("connection-close", () => {
140✔
42
            this.embeddings.clear();
306✔
43
        });
140✔
44
    }
140✔
45

46
    cleanupEmbeddingsForNamespace({ database, collection }: { database: string; collection: string }): void {
3✔
47
        const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
4✔
48
        this.embeddings.delete(embeddingDefKey);
4✔
49
    }
4✔
50

51
    async indexExists({
3✔
52
        database,
1✔
53
        collection,
1✔
54
        indexName,
1✔
55
    }: {
1✔
56
        database: string;
57
        collection: string;
58
        indexName: string;
59
    }): Promise<boolean> {
1✔
60
        const provider = await this.atlasSearchEnabledProvider();
1✔
61
        if (!provider) {
1!
NEW
62
            return false;
×
NEW
63
        }
×
64

65
        const searchIndexesWithName = await provider.getSearchIndexes(database, collection, indexName);
1✔
66

67
        return searchIndexesWithName.length >= 1;
1✔
68
    }
1✔
69

70
    async embeddingsForNamespace({
3✔
71
        database,
25✔
72
        collection,
25✔
73
    }: {
25✔
74
        database: string;
75
        collection: string;
76
    }): Promise<VectorFieldIndexDefinition[]> {
25✔
77
        const provider = await this.atlasSearchEnabledProvider();
25✔
78
        if (!provider) {
25!
79
            return [];
×
80
        }
×
81

82
        // We only need the embeddings for validation now, so don't query them if
83
        // validation is disabled.
84
        if (this.config.disableEmbeddingsValidation) {
25!
85
            return [];
×
86
        }
×
87

88
        const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
25✔
89
        const definition = this.embeddings.get(embeddingDefKey);
25✔
90

91
        if (!definition) {
25✔
92
            const allSearchIndexes = await provider.getSearchIndexes(database, collection);
12✔
93
            const vectorSearchIndexes = allSearchIndexes.filter((index) => index.type === "vectorSearch");
11✔
94
            const vectorFields = vectorSearchIndexes
11✔
95
                // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
96
                .flatMap<Document>((index) => (index.latestDefinition?.fields as Document[]) ?? [])
11!
97
                .filter((field) => this.isVectorFieldIndexDefinition(field));
11✔
98

99
            this.embeddings.set(embeddingDefKey, vectorFields);
11✔
100
            return vectorFields;
11✔
101
        }
11!
102

103
        return definition;
13✔
104
    }
25✔
105

106
    async findFieldsWithWrongEmbeddings(
3✔
107
        {
22✔
108
            database,
22✔
109
            collection,
22✔
110
        }: {
22✔
111
            database: string;
112
            collection: string;
113
        },
114
        document: Document
22✔
115
    ): Promise<VectorFieldValidationError[]> {
22✔
116
        const provider = await this.atlasSearchEnabledProvider();
22✔
117
        if (!provider) {
22!
118
            return [];
3✔
119
        }
3✔
120

121
        // While we can do our best effort to ensure that the embedding validation is correct
122
        // based on https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-quantization/
123
        // it's a complex process so we will also give the user the ability to disable this validation
124
        if (this.config.disableEmbeddingsValidation) {
22!
125
            return [];
3✔
126
        }
3✔
127

128
        const embeddings = await this.embeddingsForNamespace({ database, collection });
16✔
129
        return embeddings
16✔
130
            .map((emb) => this.getValidationErrorForDocument(emb, document))
16✔
131
            .filter((e) => e !== undefined);
16✔
132
    }
22✔
133

134
    private async atlasSearchEnabledProvider(): Promise<NodeDriverServiceProvider | null> {
3✔
135
        const connectionState = this.connectionManager.currentConnectionState;
52✔
136
        if (connectionState.tag === "connected" && (await connectionState.isSearchSupported())) {
52✔
137
            return connectionState.serviceProvider;
49✔
138
        }
49!
139

140
        return null;
3✔
141
    }
52✔
142

143
    private isVectorFieldIndexDefinition(doc: Document): doc is VectorFieldIndexDefinition {
3✔
144
        return doc["type"] === "vector";
20✔
145
    }
20✔
146

147
    private getValidationErrorForDocument(
3✔
148
        definition: VectorFieldIndexDefinition,
46✔
149
        document: Document
46✔
150
    ): VectorFieldValidationError | undefined {
46✔
151
        const fieldPath = definition.path.split(".");
46✔
152
        let fieldRef: unknown = document;
46✔
153

154
        const constructError = (
46✔
155
            details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions" | "actualQuantization">>
4✔
156
        ): VectorFieldValidationError => ({
4✔
157
            path: definition.path,
4✔
158
            expectedNumDimensions: definition.numDimensions,
4✔
159
            expectedQuantization: definition.quantization,
4✔
160
            actualNumDimensions: details.actualNumDimensions ?? "unknown",
4✔
161
            actualQuantization: details.actualQuantization ?? "unknown",
4✔
162
            error: details.error ?? "not-a-vector",
4!
163
        });
4✔
164

165
        for (const field of fieldPath) {
46✔
166
            if (fieldRef && typeof fieldRef === "object" && field in fieldRef) {
71✔
167
                fieldRef = (fieldRef as Record<string, unknown>)[field];
37✔
168
            } else {
71✔
169
                return undefined;
34✔
170
            }
34✔
171
        }
71✔
172

173
        switch (definition.quantization) {
12✔
174
            // Because quantization is not defined by the user
175
            // we have to trust them in the format they use.
176
            case "none":
46!
177
                return undefined;
×
178
            case "scalar":
46✔
179
            case "binary":
46✔
180
                if (fieldRef instanceof BSON.Binary) {
12✔
181
                    try {
2✔
182
                        const elements = fieldRef.toFloat32Array();
2✔
183
                        if (elements.length !== definition.numDimensions) {
2!
184
                            return constructError({
×
185
                                actualNumDimensions: elements.length,
×
186
                                actualQuantization: "binary",
×
187
                                error: "dimension-mismatch",
×
188
                            });
×
189
                        }
×
190

191
                        return undefined;
×
192
                    } catch {
2✔
193
                        // bits are also supported
194
                        try {
2✔
195
                            const bits = fieldRef.toBits();
2✔
196
                            if (bits.length !== definition.numDimensions) {
2!
197
                                return constructError({
×
198
                                    actualNumDimensions: bits.length,
×
199
                                    actualQuantization: "binary",
×
200
                                    error: "dimension-mismatch",
×
201
                                });
×
202
                            }
×
203

204
                            return undefined;
2✔
205
                        } catch {
2!
206
                            return constructError({
×
207
                                actualQuantization: "binary",
×
208
                                error: "not-a-vector",
×
209
                            });
×
210
                        }
×
211
                    }
2✔
212
                } else {
12✔
213
                    if (!Array.isArray(fieldRef)) {
10✔
214
                        return constructError({
2✔
215
                            error: "not-a-vector",
2✔
216
                        });
2✔
217
                    }
2✔
218

219
                    if (fieldRef.length !== definition.numDimensions) {
10✔
220
                        return constructError({
1✔
221
                            actualNumDimensions: fieldRef.length,
1✔
222
                            actualQuantization: "scalar",
1✔
223
                            error: "dimension-mismatch",
1✔
224
                        });
1✔
225
                    }
1✔
226

227
                    if (!fieldRef.every((e) => this.isANumber(e))) {
10✔
228
                        return constructError({
1✔
229
                            actualNumDimensions: fieldRef.length,
1✔
230
                            actualQuantization: "scalar",
1✔
231
                            error: "not-numeric",
1✔
232
                        });
1✔
233
                    }
1✔
234
                }
10✔
235

236
                break;
6✔
237
        }
46✔
238

239
        return undefined;
6✔
240
    }
46✔
241

242
    public async generateEmbeddings({
3✔
243
        database,
4✔
244
        collection,
4✔
245
        path,
4✔
246
        rawValues,
4✔
247
        embeddingParameters,
4✔
248
        inputType,
4✔
249
    }: {
4✔
250
        database: string;
251
        collection: string;
252
        path: string;
253
        rawValues: string[];
254
        embeddingParameters: SupportedEmbeddingParameters;
255
        inputType: EmbeddingParameters["inputType"];
256
    }): Promise<unknown[]> {
4✔
257
        const provider = await this.atlasSearchEnabledProvider();
4✔
258
        if (!provider) {
4!
259
            throw new MongoDBError(
×
260
                ErrorCodes.AtlasSearchNotSupported,
×
261
                "Atlas Search is not supported in this cluster."
×
262
            );
×
263
        }
×
264

265
        const embeddingsProvider = this.embeddingsProvider(this.config);
4✔
266

267
        if (!embeddingsProvider) {
4!
268
            throw new MongoDBError(ErrorCodes.NoEmbeddingsProviderConfigured, "No embeddings provider configured.");
×
269
        }
×
270

271
        if (this.config.disableEmbeddingsValidation) {
4✔
272
            return await embeddingsProvider.embed(embeddingParameters.model, rawValues, {
1✔
273
                inputType,
1✔
274
                ...embeddingParameters,
1✔
275
            });
1✔
276
        }
1✔
277

278
        const embeddingInfoForCollection = await this.embeddingsForNamespace({ database, collection });
3✔
279
        const embeddingInfoForPath = embeddingInfoForCollection.find((definition) => definition.path === path);
1✔
280
        if (!embeddingInfoForPath) {
4!
281
            throw new MongoDBError(
×
282
                ErrorCodes.AtlasVectorSearchIndexNotFound,
×
283
                `No Vector Search index found for path "${path}" in namespace "${database}.${collection}"`
×
284
            );
×
285
        }
✔
286

287
        return await embeddingsProvider.embed(embeddingParameters.model, rawValues, {
1✔
288
            inputType,
1✔
289
            ...embeddingParameters,
1✔
290
        });
1✔
291
    }
4✔
292

293
    private isANumber(value: unknown): boolean {
3✔
294
        if (typeof value === "number") {
49✔
295
            return true;
16✔
296
        }
16✔
297

298
        if (
33✔
299
            value instanceof BSON.Int32 ||
33✔
300
            value instanceof BSON.Decimal128 ||
17✔
301
            value instanceof BSON.Double ||
17✔
302
            value instanceof BSON.Long
9✔
303
        ) {
49✔
304
            return true;
32✔
305
        }
32✔
306

307
        return false;
1✔
308
    }
49✔
309
}
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc