• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 10074443031

24 Jul 2024 09:51AM UTC coverage: 90.084% (-0.04%) from 90.122%
10074443031

Pull #7943

github

web-flow
Merge 3c2a91368 into 0c9dc008f
Pull Request #7943: feat: Multimodal ChatMessage

6995 of 7765 relevant lines covered (90.08%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.31
haystack/components/builders/answer_builder.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import re
1✔
6
from typing import Any, Dict, List, Optional, Union
1✔
7

8
from haystack import Document, GeneratedAnswer, component, logging
1✔
9
from haystack.dataclasses.chat_message import ChatMessage
1✔
10

11
logger = logging.getLogger(__name__)
1✔
12

13

14
@component
1✔
15
class AnswerBuilder:
1✔
16
    """
17
    Takes a query and the replies a Generator returns as input and parses them into GeneratedAnswer objects.
18

19
    Optionally, it also takes Documents and metadata from the Generator as inputs to enrich the GeneratedAnswer objects.
20

21
    Usage example:
22
    ```python
23
    from haystack.components.builders import AnswerBuilder
24

25
    builder = AnswerBuilder(pattern="Answer: (.*)")
26
    builder.run(query="What's the answer?", replies=["This is an argument. Answer: This is the answer."])
27
    ```
28
    """
29

30
    def __init__(self, pattern: Optional[str] = None, reference_pattern: Optional[str] = None):
1✔
31
        """
32
        Creates an instance of the AnswerBuilder component.
33

34
        :param pattern:
35
            The regular expression pattern to use to extract the answer text from the generator output.
36
            If not specified, the whole string is used as the answer. The regular expression can have at
37
            most one capture group. If a capture group is present, the text matched by the capture group
38
            is used as the answer. If no capture group is present, the whole match is used as the answer.
39
            Examples:
40
                `[^\\n]+$` finds "this is an answer" in a string "this is an argument.\\nthis is an answer".
41
                `Answer: (.*)` finds "this is an answer" in a string "this is an argument. Answer: this is an answer".
42

43
        :param reference_pattern:
44
            The regular expression pattern to use for parsing the document references.
45
            We assume that references are specified as indices of the input documents and that
46
            indices start at 1.
47
            Example: `\\[(\\d+)\\]` finds "1" in a string "this is an answer[1]".
48
            If not specified, no parsing is done, and all documents are referenced.
49
        """
50
        if pattern:
1✔
51
            AnswerBuilder._check_num_groups_in_regex(pattern)
1✔
52

53
        self.pattern = pattern
1✔
54
        self.reference_pattern = reference_pattern
1✔
55

56
    @component.output_types(answers=List[GeneratedAnswer])
1✔
57
    def run(
1✔
58
        self,
59
        query: str,
60
        replies: Union[List[str], List[ChatMessage]],
61
        meta: Optional[List[Dict[str, Any]]] = None,
62
        documents: Optional[List[Document]] = None,
63
        pattern: Optional[str] = None,
64
        reference_pattern: Optional[str] = None,
65
    ):
66
        """
67
        Turns the output of a Generator into `Answer` objects using regular expressions.
68

69
        :param query:
70
            The query used in the prompts for the Generator.
71
        :param replies:
72
            The output of the Generator. Can be a list of strings or a list of ChatMessage objects.
73
        :param meta:
74
            The metadata returned by the Generator. If not specified, the generated answer will contain no metadata.
75
        :param documents:
76
            The documents used as input to the Generator. If `documents` are specified, they are added to the `Answer`
77
            objects. If both `documents` and `reference_pattern` are specified, the documents referenced in the
78
            Generator output are extracted from the input documents and added to the `Answer` objects.
79
        :param pattern:
80
            The regular expression pattern to use to extract the answer text from the generator output.
81
            If not specified, the whole string is used as the answer. The regular expression can have at
82
            most one capture group. If a capture group is present, the text matched by the capture group
83
            is used as the answer. If no capture group is present, the whole match is used as the answer.
84
                Examples:
85
                    `[^\\n]+$` finds "this is an answer" in a string "this is an argument.\\nthis is an answer".
86
                    `Answer: (.*)` finds "this is an answer" in a string
87
                    "this is an argument. Answer: this is an answer".
88
        :param reference_pattern:
89
            The regular expression pattern to use for parsing the document references.
90
            We assume that references are specified as indices of the input documents and that indices start at 1.
91
            Example: `\\[(\\d+)\\]` finds "1" in a string "this is an answer[1]".
92
            If not specified, no parsing is done, and all documents are referenced.
93

94
        :returns: A dictionary with the following keys:
95
            - `answers`: The answers obtained from the output of the generator
96
        """
97
        if not meta:
1✔
98
            meta = [{}] * len(replies)
1✔
99
        elif len(replies) != len(meta):
1✔
100
            raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(meta)}) must match.")
1✔
101

102
        if pattern:
1✔
103
            AnswerBuilder._check_num_groups_in_regex(pattern)
1✔
104

105
        pattern = pattern or self.pattern
1✔
106
        reference_pattern = reference_pattern or self.reference_pattern
1✔
107
        all_answers = []
1✔
108
        for reply, metadata in zip(replies, meta):
1✔
109
            # Extract content from ChatMessage objects if reply is a ChatMessages, else use the string as is
110
            extracted_reply: str = reply.content if isinstance(reply, ChatMessage) else reply  # type: ignore
1✔
111
            extracted_metadata = reply.meta if isinstance(reply, ChatMessage) else metadata
1✔
112

113
            extracted_metadata.pop("__haystack_content_type__", None)  # Extract ChatMessage internal metadata key
1✔
114

115
            referenced_docs = []
1✔
116
            if documents:
1✔
117
                if reference_pattern:
1✔
118
                    reference_idxs = AnswerBuilder._extract_reference_idxs(extracted_reply, reference_pattern)
1✔
119
                else:
120
                    reference_idxs = [doc_idx for doc_idx, _ in enumerate(documents)]
1✔
121

122
                for idx in reference_idxs:
1✔
123
                    try:
1✔
124
                        referenced_docs.append(documents[idx])
1✔
125
                    except IndexError:
1✔
126
                        logger.warning(
1✔
127
                            "Document index '{index}' referenced in Generator output is out of range. ", index=idx + 1
128
                        )
129

130
            answer_string = AnswerBuilder._extract_answer_string(extracted_reply, pattern)
1✔
131
            answer = GeneratedAnswer(
1✔
132
                data=answer_string, query=query, documents=referenced_docs, meta=extracted_metadata
133
            )
134
            all_answers.append(answer)
1✔
135

136
        return {"answers": all_answers}
1✔
137

138
    @staticmethod
1✔
139
    def _extract_answer_string(reply: str, pattern: Optional[str] = None) -> str:
1✔
140
        """
141
        Extract the answer string from the generator output using the specified pattern.
142

143
        If no pattern is specified, the whole string is used as the answer.
144

145
        :param reply:
146
            The output of the Generator. A string.
147
        :param pattern:
148
            The regular expression pattern to use to extract the answer text from the generator output.
149
        """
150
        if pattern is None:
1✔
151
            return reply
1✔
152

153
        if match := re.search(pattern, reply):
1✔
154
            # No capture group in pattern -> use the whole match as answer
155
            if not match.lastindex:
1✔
156
                return match.group(0)
1✔
157
            # One capture group in pattern -> use the capture group as answer
158
            return match.group(1)
1✔
159
        return ""
×
160

161
    @staticmethod
1✔
162
    def _extract_reference_idxs(reply: str, reference_pattern: str) -> List[int]:
1✔
163
        document_idxs = re.findall(reference_pattern, reply)
1✔
164
        return [int(idx) - 1 for idx in document_idxs]
1✔
165

166
    @staticmethod
1✔
167
    def _check_num_groups_in_regex(pattern: str):
1✔
168
        num_groups = re.compile(pattern).groups
1✔
169
        if num_groups > 1:
1✔
170
            raise ValueError(
1✔
171
                f"Pattern '{pattern}' contains multiple capture groups. "
172
                f"Please specify a pattern with at most one capture group."
173
            )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc