• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 5646559600

pending completion
5646559600

push

github

web-flow
test: reactivate unit tests in `test_eval.py` (#5255)

* Activate tests that follow unit test and integration test rules

* Adding more integration labels

* Change name to better reflect complexity of test

* Remove mark integration tags, move test to doc store test for add_eval_data

* Removing incorrect integration label

* Deactivated document store test b/c it fails for Weaviate and pinecone

* Remove unit label since test needs to be refactored to be considered a unit test

* Undo changes

* Undo change

* Check every field in the load evaluation result

* Add back label and add skip reason

* Use pytest skip instead of TODO

10748 of 23272 relevant lines covered (46.18%)

2.57 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

33.99
haystack/testing/document_store.py
1
# pylint: disable=too-many-public-methods
2
import sys
2✔
3

4
import pytest
2✔
5
import numpy as np
2✔
6

7
from haystack.schema import Document, Label, Answer, Span
2✔
8
from haystack.errors import DuplicateDocumentError
2✔
9
from haystack.document_stores import BaseDocumentStore
2✔
10

11

12
@pytest.mark.document_store
2✔
13
class DocumentStoreBaseTestAbstract:
2✔
14
    """
15
    This is a base class to test abstract methods from DocumentStoreBase to be inherited by any Document Store
16
    testsuite. It doesn't have the `Test` prefix in the name so that its methods won't be collected for this
17
    class but only for its subclasses.
18
    """
19

20
    @pytest.fixture
2✔
21
    def documents(self):
2✔
22
        documents = []
2✔
23
        for i in range(3):
2✔
24
            documents.append(
2✔
25
                Document(
26
                    content=f"A Foo Document {i}",
27
                    meta={"name": f"name_{i}", "year": "2020", "month": "01", "numbers": [2, 4]},
28
                    embedding=np.random.rand(768).astype(np.float32),
29
                )
30
            )
31

32
            documents.append(
2✔
33
                Document(
34
                    content=f"A Bar Document {i}",
35
                    meta={"name": f"name_{i}", "year": "2021", "month": "02", "numbers": [-2, -4]},
36
                    embedding=np.random.rand(768).astype(np.float32),
37
                )
38
            )
39

40
            documents.append(
2✔
41
                Document(
42
                    content=f"Document {i} without embeddings",
43
                    meta={"name": f"name_{i}", "no_embedding": True, "month": "03"},
44
                )
45
            )
46

47
        return documents
2✔
48

49
    @pytest.fixture
2✔
50
    def labels(self, documents):
2✔
51
        labels = []
×
52
        for i, d in enumerate(documents):
×
53
            labels.append(
×
54
                Label(
55
                    query=f"query_{i}",
56
                    document=d,
57
                    is_correct_document=True,
58
                    is_correct_answer=False,
59
                    # create a mix set of labels
60
                    origin="user-feedback" if i % 2 else "gold-label",
61
                    answer=None if not i else Answer(f"the answer is {i}", document_ids=[d.id]),
62
                    meta={"name": f"label_{i}", "year": f"{2020 + i}"},
63
                )
64
            )
65
        return labels
×
66

67
    #
68
    # Integration tests
69
    #
70

71
    @pytest.mark.integration
2✔
72
    def test_write_documents(self, ds, documents):
2✔
73
        ds.write_documents(documents)
×
74
        docs = ds.get_all_documents()
×
75
        assert len(docs) == len(documents)
×
76
        expected_ids = set(doc.id for doc in documents)
×
77
        ids = set(doc.id for doc in docs)
×
78
        assert ids == expected_ids
×
79

80
    @pytest.mark.integration
2✔
81
    def test_write_labels(self, ds, labels):
2✔
82
        ds.write_labels(labels)
×
83
        assert ds.get_all_labels() == labels
×
84

85
    @pytest.mark.integration
2✔
86
    def test_write_with_duplicate_doc_ids(self, ds):
2✔
87
        duplicate_documents = [
×
88
            Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}),
89
            Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}),
90
        ]
91
        ds.write_documents(duplicate_documents, duplicate_documents="skip")
×
92
        results = ds.get_all_documents()
×
93
        assert len(results) == 1
×
94
        assert results[0] == duplicate_documents[0]
×
95
        with pytest.raises(Exception):
×
96
            ds.write_documents(duplicate_documents, duplicate_documents="fail")
×
97

98
    @pytest.mark.integration
2✔
99
    def test_get_embedding_count(self, ds, documents):
2✔
100
        """
101
        We expect 6 docs with embeddings because only 6 documents in the documents fixture for this class contain
102
        embeddings.
103
        """
104
        ds.write_documents(documents)
×
105
        assert ds.get_embedding_count() == 6
×
106

107
    @pytest.mark.skip
2✔
108
    @pytest.mark.integration
2✔
109
    def test_get_all_documents_without_filters(self, ds, documents):
2✔
110
        ds.write_documents(documents)
×
111
        out = ds.get_all_documents()
×
112
        assert out == documents
×
113

114
    @pytest.mark.integration
2✔
115
    def test_get_all_documents_without_embeddings(self, ds, documents):
2✔
116
        ds.write_documents(documents)
×
117
        out = ds.get_all_documents(return_embedding=False)
×
118
        for doc in out:
×
119
            assert doc.embedding is None
×
120

121
    @pytest.mark.integration
2✔
122
    def test_get_all_document_filter_duplicate_text_value(self, ds):
2✔
123
        documents = [
×
124
            Document(content="duplicated", meta={"meta_field": "0"}, id_hash_keys=["meta"]),
125
            Document(content="duplicated", meta={"meta_field": "1", "name": "file.txt"}, id_hash_keys=["meta"]),
126
            Document(content="Doc2", meta={"name": "file_2.txt"}, id_hash_keys=["meta"]),
127
        ]
128
        ds.write_documents(documents)
×
129
        documents = ds.get_all_documents(filters={"meta_field": ["1"]})
×
130
        assert len(documents) == 1
×
131
        assert documents[0].content == "duplicated"
×
132
        assert documents[0].meta["name"] == "file.txt"
×
133

134
        documents = ds.get_all_documents(filters={"meta_field": ["0"]})
×
135
        assert len(documents) == 1
×
136
        assert documents[0].content == "duplicated"
×
137
        assert documents[0].meta.get("name") is None
×
138

139
        documents = ds.get_all_documents(filters={"name": ["file_2.txt"]})
×
140
        assert len(documents) == 1
×
141
        assert documents[0].content == "Doc2"
×
142
        assert documents[0].meta.get("meta_field") is None
×
143

144
    @pytest.mark.integration
2✔
145
    def test_get_all_documents_with_correct_filters(self, ds, documents):
2✔
146
        ds.write_documents(documents)
×
147
        result = ds.get_all_documents(filters={"year": ["2020"]})
×
148
        assert len(result) == 3
×
149

150
        documents = ds.get_all_documents(filters={"year": ["2020", "2021"]})
×
151
        assert len(documents) == 6
×
152

153
    @pytest.mark.integration
2✔
154
    def test_get_all_documents_with_incorrect_filter_name(self, ds, documents):
2✔
155
        ds.write_documents(documents)
×
156
        result = ds.get_all_documents(filters={"non_existing_meta_field": ["whatever"]})
×
157
        assert len(result) == 0
×
158

159
    @pytest.mark.integration
2✔
160
    def test_get_all_documents_with_incorrect_filter_value(self, ds, documents):
2✔
161
        ds.write_documents(documents)
×
162
        result = ds.get_all_documents(filters={"year": ["nope"]})
×
163
        assert len(result) == 0
×
164

165
    @pytest.mark.integration
2✔
166
    def test_eq_filters(self, ds, documents):
2✔
167
        ds.write_documents(documents)
×
168

169
        result = ds.get_all_documents(filters={"year": {"$eq": "2020"}})
×
170
        assert len(result) == 3
×
171
        result = ds.get_all_documents(filters={"year": "2020"})
×
172
        assert len(result) == 3
×
173

174
    @pytest.mark.integration
2✔
175
    def test_in_filters(self, ds, documents):
2✔
176
        ds.write_documents(documents)
×
177

178
        result = ds.get_all_documents(filters={"year": {"$in": ["2020", "2021", "n.a."]}})
×
179
        assert len(result) == 6
×
180
        result = ds.get_all_documents(filters={"year": ["2020", "2021", "n.a."]})
×
181
        assert len(result) == 6
×
182

183
    @pytest.mark.integration
2✔
184
    def test_ne_filters(self, ds, documents):
2✔
185
        ds.write_documents(documents)
×
186

187
        result = ds.get_all_documents(filters={"year": {"$ne": "2020"}})
×
188
        assert len(result) == 6
×
189

190
    @pytest.mark.integration
2✔
191
    def test_nin_filters(self, ds, documents):
2✔
192
        ds.write_documents(documents)
×
193

194
        result = ds.get_all_documents(filters={"year": {"$nin": ["2020", "2021", "n.a."]}})
×
195
        assert len(result) == 3
×
196

197
    @pytest.mark.integration
2✔
198
    def test_comparison_filters(self, ds, documents):
2✔
199
        ds.write_documents(documents)
×
200

201
        result = ds.get_all_documents(filters={"numbers": {"$gt": 0.0}})
×
202
        assert len(result) == 3
×
203

204
        result = ds.get_all_documents(filters={"numbers": {"$gte": -2.0}})
×
205
        assert len(result) == 6
×
206

207
        result = ds.get_all_documents(filters={"numbers": {"$lt": 0.0}})
×
208
        assert len(result) == 3
×
209

210
        result = ds.get_all_documents(filters={"numbers": {"$lte": 2.0}})
×
211
        assert len(result) == 6
×
212

213
    @pytest.mark.integration
2✔
214
    def test_compound_filters(self, ds, documents):
2✔
215
        ds.write_documents(documents)
×
216

217
        result = ds.get_all_documents(filters={"year": {"$lte": "2021", "$gte": "2020"}})
×
218
        assert len(result) == 6
×
219

220
    @pytest.mark.integration
2✔
221
    def test_simplified_filters(self, ds, documents):
2✔
222
        ds.write_documents(documents)
×
223

224
        filters = {"$and": {"year": {"$lte": "2021", "$gte": "2020"}, "name": {"$in": ["name_0", "name_1"]}}}
×
225
        result = ds.get_all_documents(filters=filters)
×
226
        assert len(result) == 4
×
227

228
        filters_simplified = {"year": {"$lte": "2021", "$gte": "2020"}, "name": ["name_0", "name_1"]}
×
229
        result = ds.get_all_documents(filters=filters_simplified)
×
230
        assert len(result) == 4
×
231

232
    @pytest.mark.integration
2✔
233
    def test_nested_condition_filters(self, ds, documents):
2✔
234
        ds.write_documents(documents)
×
235
        filters = {
×
236
            "$and": {
237
                "year": {"$lte": "2021", "$gte": "2020"},
238
                "$or": {"name": {"$in": ["name_0", "name_1"]}, "numbers": {"$lt": 5.0}},
239
            }
240
        }
241
        result = ds.get_all_documents(filters=filters)
×
242
        assert len(result) == 6
×
243

244
        filters_simplified = {
×
245
            "year": {"$lte": "2021", "$gte": "2020"},
246
            "$or": {"name": {"$in": ["name_0", "name_2"]}, "numbers": {"$lt": 5.0}},
247
        }
248
        result = ds.get_all_documents(filters=filters_simplified)
×
249
        assert len(result) == 6
×
250

251
        filters = {
×
252
            "$and": {
253
                "year": {"$lte": "2021", "$gte": "2020"},
254
                "$or": {
255
                    "name": {"$in": ["name_0", "name_1"]},
256
                    "$and": {"numbers": {"$lt": 5.0}, "$not": {"month": {"$eq": "01"}}},
257
                },
258
            }
259
        }
260
        result = ds.get_all_documents(filters=filters)
×
261
        assert len(result) == 5
×
262

263
        filters_simplified = {
×
264
            "year": {"$lte": "2021", "$gte": "2020"},
265
            "$or": {"name": ["name_0", "name_1"], "$and": {"numbers": {"$lt": 5.0}, "$not": {"month": {"$eq": "01"}}}},
266
        }
267
        result = ds.get_all_documents(filters=filters_simplified)
×
268
        assert len(result) == 5
×
269

270
    @pytest.mark.integration
2✔
271
    def test_nested_condition_not_filters(self, ds, documents):
2✔
272
        """
273
        Test nested logical operations within "$not", important as we apply De Morgan's laws in WeaviateDocumentstore
274
        """
275
        ds.write_documents(documents)
×
276
        filters = {
×
277
            "$not": {
278
                "$or": {
279
                    "$and": {"numbers": {"$lt": 5.0}, "month": {"$ne": "01"}},
280
                    "$not": {"year": {"$lte": "2021", "$gte": "2020"}},
281
                }
282
            }
283
        }
284
        result = ds.get_all_documents(filters=filters)
×
285
        assert len(result) == 3
×
286

287
        docs_meta = result[0].meta["numbers"]
×
288
        assert [2, 4] == docs_meta
×
289

290
        # Test same logical operator twice on same level
291

292
        filters = {
×
293
            "$or": [
294
                {"$and": {"name": {"$in": ["name_0", "name_1"]}, "year": {"$gte": "2020"}}},
295
                {"$and": {"name": {"$in": ["name_0", "name_1"]}, "year": {"$lt": "2021"}}},
296
            ]
297
        }
298
        result = ds.get_all_documents(filters=filters)
×
299
        docs_meta = [doc.meta["name"] for doc in result]
×
300
        assert len(result) == 4
×
301
        assert "name_0" in docs_meta
×
302
        assert "name_2" not in docs_meta
×
303

304
    @pytest.mark.integration
2✔
305
    def test_get_document_by_id(self, ds, documents):
2✔
306
        ds.write_documents(documents)
×
307
        doc = ds.get_document_by_id(documents[0].id)
×
308
        assert doc.id == documents[0].id
×
309
        assert doc.content == documents[0].content
×
310

311
    @pytest.mark.integration
2✔
312
    def test_get_documents_by_id(self, ds, documents):
2✔
313
        ds.write_documents(documents)
×
314
        ids = [doc.id for doc in documents]
×
315
        result = {doc.id for doc in ds.get_documents_by_id(ids, batch_size=2)}
×
316
        assert set(ids) == result
×
317

318
    @pytest.mark.integration
2✔
319
    def test_get_document_count(self, ds, documents):
2✔
320
        ds.write_documents(documents)
×
321
        assert ds.get_document_count() == len(documents)
×
322
        assert ds.get_document_count(filters={"year": ["2020"]}) == 3
×
323
        assert ds.get_document_count(filters={"month": ["02"]}) == 3
×
324

325
    @pytest.mark.integration
2✔
326
    def test_get_all_documents_generator(self, ds, documents):
2✔
327
        ds.write_documents(documents)
×
328
        assert len(list(ds.get_all_documents_generator(batch_size=2))) == 9
×
329

330
    @pytest.mark.integration
2✔
331
    def test_duplicate_documents_skip(self, ds, documents):
2✔
332
        ds.write_documents(documents)
×
333

334
        updated_docs = []
×
335
        for d in documents:
×
336
            updated_d = Document.from_dict(d.to_dict())
×
337
            updated_d.meta["name"] = "Updated"
×
338
            updated_docs.append(updated_d)
×
339

340
        ds.write_documents(updated_docs, duplicate_documents="skip")
×
341
        for d in ds.get_all_documents():
×
342
            assert d.meta.get("name") != "Updated"
×
343

344
    @pytest.mark.integration
2✔
345
    def test_duplicate_documents_overwrite(self, ds, documents):
2✔
346
        ds.write_documents(documents)
×
347

348
        updated_docs = []
×
349
        for d in documents:
×
350
            updated_d = Document.from_dict(d.to_dict())
×
351
            updated_d.meta["name"] = "Updated"
×
352
            updated_docs.append(updated_d)
×
353

354
        ds.write_documents(updated_docs, duplicate_documents="overwrite")
×
355
        for doc in ds.get_all_documents():
×
356
            assert doc.meta["name"] == "Updated"
×
357

358
    @pytest.mark.integration
2✔
359
    def test_duplicate_documents_fail(self, ds, documents):
2✔
360
        ds.write_documents(documents)
×
361

362
        updated_docs = []
×
363
        for d in documents:
×
364
            updated_d = Document.from_dict(d.to_dict())
×
365
            updated_d.meta["name"] = "Updated"
×
366
            updated_docs.append(updated_d)
×
367

368
        with pytest.raises(DuplicateDocumentError):
×
369
            ds.write_documents(updated_docs, duplicate_documents="fail")
×
370

371
    @pytest.mark.integration
2✔
372
    def test_write_document_meta(self, ds):
2✔
373
        ds.write_documents(
×
374
            [
375
                {"content": "dict_without_meta", "id": "1"},
376
                {"content": "dict_with_meta", "meta_field": "test2", "id": "2"},
377
                Document(content="document_object_without_meta", id="3"),
378
                Document(content="document_object_with_meta", meta={"meta_field": "test4"}, id="4"),
379
            ]
380
        )
381
        assert not ds.get_document_by_id("1").meta
×
382
        assert ds.get_document_by_id("2").meta["meta_field"] == "test2"
×
383
        assert not ds.get_document_by_id("3").meta
×
384
        assert ds.get_document_by_id("4").meta["meta_field"] == "test4"
×
385

386
    @pytest.mark.integration
2✔
387
    def test_delete_documents(self, ds, documents):
2✔
388
        ds.write_documents(documents)
×
389
        ds.delete_documents()
×
390
        assert ds.get_document_count() == 0
×
391

392
    @pytest.mark.integration
2✔
393
    def test_delete_documents_with_filters(self, ds, documents):
2✔
394
        ds.write_documents(documents)
×
395
        ds.delete_documents(filters={"year": ["2020", "2021"]})
×
396
        documents = ds.get_all_documents()
×
397
        assert ds.get_document_count() == 3
×
398

399
    @pytest.mark.integration
2✔
400
    def test_delete_documents_by_id(self, ds, documents):
2✔
401
        ds.write_documents(documents)
×
402
        docs_to_delete = ds.get_all_documents(filters={"year": ["2020"]})
×
403
        ds.delete_documents(ids=[doc.id for doc in docs_to_delete])
×
404
        assert ds.get_document_count() == 6
×
405

406
    @pytest.mark.integration
2✔
407
    def test_delete_documents_by_id_with_filters(self, ds, documents):
2✔
408
        ds.write_documents(documents)
×
409
        docs_to_delete = ds.get_all_documents(filters={"year": ["2020"]})
×
410
        # this should delete only 1 document out of the 3 ids passed
411
        ds.delete_documents(ids=[doc.id for doc in docs_to_delete], filters={"name": ["name_0"]})
×
412
        assert ds.get_document_count() == 8
×
413

414
    @pytest.mark.integration
2✔
415
    def test_write_get_all_labels(self, ds, labels):
2✔
416
        ds.write_labels(labels)
×
417
        ds.write_labels(labels[:3], index="custom_index")
×
418
        assert len(ds.get_all_labels()) == 9
×
419
        assert len(ds.get_all_labels(index="custom_index")) == 3
×
420
        # remove the index we created in this test
421
        ds.delete_index("custom_index")
×
422

423
    @pytest.mark.integration
2✔
424
    def test_delete_labels(self, ds, labels):
2✔
425
        ds.write_labels(labels)
×
426
        ds.write_labels(labels[:3], index="custom_index")
×
427
        ds.delete_labels()
×
428
        ds.delete_labels(index="custom_index")
×
429
        assert len(ds.get_all_labels()) == 0
×
430
        assert len(ds.get_all_labels(index="custom_index")) == 0
×
431
        # remove the index we created in this test
432
        ds.delete_index("custom_index")
×
433

434
    @pytest.mark.integration
2✔
435
    def test_write_labels_duplicate(self, ds, labels):
2✔
436
        # create a duplicate
437
        dupe = Label.from_dict(labels[0].to_dict())
×
438

439
        ds.write_labels(labels + [dupe])
×
440

441
        # ensure the duplicate was discarded
442
        assert len(ds.get_all_labels()) == len(labels)
×
443

444
    @pytest.mark.integration
2✔
445
    def test_delete_labels_by_id(self, ds, labels):
2✔
446
        ds.write_labels(labels)
×
447
        ds.delete_labels(ids=[labels[0].id])
×
448
        assert len(ds.get_all_labels()) == len(labels) - 1
×
449

450
    @pytest.mark.integration
2✔
451
    def test_delete_labels_by_filter(self, ds, labels):
2✔
452
        ds.write_labels(labels)
×
453
        ds.delete_labels(filters={"query": "query_1"})
×
454
        assert len(ds.get_all_labels()) == len(labels) - 1
×
455

456
    @pytest.mark.integration
2✔
457
    def test_delete_labels_by_filter_id(self, ds, labels):
2✔
458
        ds.write_labels(labels)
×
459

460
        # ids and filters are ANDed, the following should have no effect
461
        ds.delete_labels(ids=[labels[0].id], filters={"query": "query_9"})
×
462
        assert len(ds.get_all_labels()) == len(labels)
×
463

464
        #
465
        ds.delete_labels(ids=[labels[0].id], filters={"query": "query_0"})
×
466
        assert len(ds.get_all_labels()) == len(labels) - 1
×
467

468
    @pytest.mark.integration
2✔
469
    def test_get_label_count(self, ds, labels):
2✔
470
        ds.write_labels(labels)
×
471
        assert ds.get_label_count() == len(labels)
×
472

473
    @pytest.mark.integration
2✔
474
    def test_delete_index(self, ds, documents):
2✔
475
        ds.write_documents(documents, index="custom_index")
×
476
        assert ds.get_document_count(index="custom_index") == len(documents)
×
477
        ds.delete_index(index="custom_index")
×
478
        with pytest.raises(Exception):
×
479
            ds.get_document_count(index="custom_index")
×
480

481
    @pytest.mark.integration
2✔
482
    def test_delete_index_does_not_raise_if_not_exists(self, ds):
2✔
483
        ds.delete_index(index="unknown_index")
×
484

485
    @pytest.mark.integration
2✔
486
    def test_update_meta(self, ds, documents):
2✔
487
        ds.write_documents(documents)
×
488
        doc = documents[0]
×
489
        ds.update_document_meta(doc.id, meta={"year": "2099", "month": "12"})
×
490
        doc = ds.get_document_by_id(doc.id)
×
491
        assert doc.meta["year"] == "2099"
×
492
        assert doc.meta["month"] == "12"
×
493

494
    @pytest.mark.integration
2✔
495
    def test_labels_with_long_texts(self, ds, documents):
2✔
496
        label = Label(
×
497
            query="question1",
498
            answer=Answer(
499
                answer="answer",
500
                type="extractive",
501
                score=0.0,
502
                context="something " * 10_000,
503
                offsets_in_document=[Span(start=12, end=14)],
504
                offsets_in_context=[Span(start=12, end=14)],
505
            ),
506
            is_correct_answer=True,
507
            is_correct_document=True,
508
            document=Document(content="something " * 10_000, id="123"),
509
            origin="gold-label",
510
        )
511
        ds.write_labels(labels=[label])
×
512
        labels = ds.get_all_labels()
×
513
        assert len(labels) == 1
×
514
        assert label == labels[0]
×
515

516
    @pytest.mark.integration
2✔
517
    @pytest.mark.skipif(sys.platform == "win32", reason="_get_documents_meta() fails with 'too many SQL variables'")
2✔
518
    def test_get_all_documents_large_quantities(self, ds):
2✔
519
        # Test to exclude situations like Weaviate not returning more than 100 docs by default
520
        #   https://github.com/deepset-ai/haystack/issues/1893
521
        docs_to_write = [
×
522
            {"meta": {"name": f"name_{i}"}, "content": f"text_{i}", "embedding": np.random.rand(768).astype(np.float32)}
523
            for i in range(1000)
524
        ]
525
        ds.write_documents(docs_to_write)
×
526
        documents = ds.get_all_documents()
×
527
        assert all(isinstance(d, Document) for d in documents)
×
528
        assert len(documents) == len(docs_to_write)
×
529

530
    @pytest.mark.integration
2✔
531
    def test_custom_embedding_field(self, ds):
2✔
532
        ds.embedding_field = "custom_embedding_field"
×
533
        doc_to_write = {"content": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
×
534
        ds.write_documents([doc_to_write])
×
535
        documents = ds.get_all_documents(return_embedding=True)
×
536
        assert len(documents) == 1
×
537
        assert documents[0].content == "test"
×
538
        # Some document stores normalize the embedding on save, let's just compare the length
539
        assert doc_to_write["custom_embedding_field"].shape == documents[0].embedding.shape
×
540

541
    @pytest.mark.skip(reason="This currently fails for Weaviate and Pinecone")
2✔
542
    @pytest.mark.integration
2✔
543
    @pytest.mark.parametrize("batch_size", [None, 20])
2✔
544
    def test_add_eval_data(self, ds, batch_size, samples_path):
2✔
545
        # add eval data (SQUAD format)
546
        ds.add_eval_data(
×
547
            filename=samples_path / "squad" / "small.json",
548
            doc_index=ds.index,
549
            label_index=ds.label_index,
550
            batch_size=batch_size,
551
        )
552
        assert ds.get_document_count() == 87
×
553
        assert ds.get_label_count() == 1214
×
554

555
    #
556
    # Unit tests
557
    #
558

559
    @pytest.mark.unit
2✔
560
    def test_normalize_embeddings_diff_shapes(self):
2✔
561
        VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32")
2✔
562
        BaseDocumentStore.normalize_embedding(VEC_1)
2✔
563
        assert np.linalg.norm(VEC_1) - 1 < 0.01
2✔
564

565
        VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32").reshape(1, -1)
2✔
566
        BaseDocumentStore.normalize_embedding(VEC_1)
2✔
567
        assert np.linalg.norm(VEC_1) - 1 < 0.01
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc