• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 10384533702

14 Aug 2024 08:56AM UTC coverage: 90.08% (-0.04%) from 90.122%
10384533702

push

github

web-flow
fix: deserialize Document Stores using specific `from_dict` class methods (#8207)

* use from_dict

* unused import

* improve logic

* improve reno

6965 of 7732 relevant lines covered (90.08%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.44
haystack/components/caching/cache_checker.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from typing import Any, Dict, List
1✔
6

7
from haystack import DeserializationError, Document, component, default_from_dict, default_to_dict, logging
1✔
8
from haystack.core.serialization import import_class_by_name
1✔
9
from haystack.document_stores.types import DocumentStore
1✔
10

11
logger = logging.getLogger(__name__)
1✔
12

13

14
@component
1✔
15
class CacheChecker:
1✔
16
    """
17
    Checks for the presence of documents in a Document Store based on a specified field in each document's metadata.
18

19
    If matching documents are found, they are returned as hits. If not, the items
20
    are returned as misses, indicating they are not in the cache.
21

22
    Usage example:
23
    ```python
24
    from haystack import Document
25
    from haystack.document_stores.in_memory import InMemoryDocumentStore
26
    from haystack.components.caching.cache_checker import CacheChecker
27

28
    docstore = InMemoryDocumentStore()
29
    documents = [
30
        Document(content="doc1", meta={"url": "https://example.com/1"}),
31
        Document(content="doc2", meta={"url": "https://example.com/2"}),
32
        Document(content="doc3", meta={"url": "https://example.com/1"}),
33
        Document(content="doc4", meta={"url": "https://example.com/2"}),
34
    ]
35
    docstore.write_documents(documents)
36
    checker = CacheChecker(docstore, cache_field="url")
37
    results = checker.run(items=["https://example.com/1", "https://example.com/5"])
38
    assert results == {"hits": [documents[0], documents[2]], "misses": ["https://example.com/5"]}
39
    ```
40
    """
41

42
    def __init__(self, document_store: DocumentStore, cache_field: str):
1✔
43
        """
44
        Create a CacheChecker component.
45

46
        :param document_store:
47
            Document store to check.
48
        :param cache_field:
49
            Name of the Document metadata field
50
            to check for cache hits.
51
        """
52
        self.document_store = document_store
1✔
53
        self.cache_field = cache_field
1✔
54

55
    def to_dict(self) -> Dict[str, Any]:
1✔
56
        """
57
        Serializes the component to a dictionary.
58

59
        :returns:
60
            Dictionary with serialized data.
61
        """
62
        return default_to_dict(self, document_store=self.document_store.to_dict(), cache_field=self.cache_field)
1✔
63

64
    @classmethod
1✔
65
    def from_dict(cls, data: Dict[str, Any]) -> "CacheChecker":
1✔
66
        """
67
        Deserializes the component from a dictionary.
68

69
        :param data:
70
            Dictionary to deserialize from.
71
        :returns:
72
            Deserialized component.
73
        """
74
        init_params = data.get("init_parameters", {})
1✔
75
        if "document_store" not in init_params:
1✔
76
            raise DeserializationError("Missing 'document_store' in serialization data")
1✔
77
        if "type" not in init_params["document_store"]:
1✔
78
            raise DeserializationError("Missing 'type' in document store's serialization data")
1✔
79

80
        doc_store_data = data["init_parameters"]["document_store"]
1✔
81
        try:
1✔
82
            doc_store_class = import_class_by_name(doc_store_data["type"])
1✔
83
        except ImportError as e:
1✔
84
            raise DeserializationError(f"Class '{doc_store_data['type']}' not correctly imported") from e
1✔
85
        if hasattr(doc_store_class, "from_dict"):
1✔
86
            data["init_parameters"]["document_store"] = doc_store_class.from_dict(doc_store_data)
1✔
87
        else:
88
            data["init_parameters"]["document_store"] = default_from_dict(doc_store_class, doc_store_data)
×
89

90
        return default_from_dict(cls, data)
1✔
91

92
    @component.output_types(hits=List[Document], misses=List)
1✔
93
    def run(self, items: List[Any]):
1✔
94
        """
95
        Checks if any document associated with the specified cache field is already present in the store.
96

97
        :param items:
98
            Values to be checked against the cache field.
99
        :return:
100
            A dictionary with two keys:
101
            - `hits` - Documents that matched with any of the items.
102
            - `misses` - Items that were not present in any documents.
103
        """
104
        found_documents = []
1✔
105
        misses = []
1✔
106

107
        for item in items:
1✔
108
            filters = {"field": self.cache_field, "operator": "==", "value": item}
1✔
109
            found = self.document_store.filter_documents(filters=filters)
1✔
110
            if found:
1✔
111
                found_documents.extend(found)
1✔
112
            else:
113
                misses.append(item)
1✔
114
        return {"hits": found_documents, "misses": misses}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc