14404479415

Committed 11 Apr 2025 01:28PM UTC coverage: 90.316% (-0.01%) from 90.329%

Build # 14404479415

Build Type

Pull #9219

github

Committed by

web-flow

Commit Message

Merge a68bbd1de into 8bf41a851

Pull Request Pull Request #9219: chore: LLM Evaluators - remove deprecated parameters

Run Details

10660 of 11803 relevant lines covered (90.32%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.78

haystack/components/evaluators/llm_evaluator.py

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import json
from typing import Any, Dict, List, Optional, Tuple, Type

from tqdm import tqdm

from haystack import component, default_from_dict, default_to_dict, logging
from haystack.components.builders import PromptBuilder
from haystack.components.generators.chat.openai import OpenAIChatGenerator
from haystack.components.generators.chat.types import ChatGenerator
from haystack.core.serialization import component_to_dict
from haystack.dataclasses.chat_message import ChatMessage
from haystack.utils import deserialize_chatgenerator_inplace, deserialize_type, serialize_type

logger = logging.getLogger(__name__)


@component
class LLMEvaluator:
    """
    Uses an LLM to evaluate inputs based on a prompt containing instructions and examples.

    The default API requires an OpenAI API key to be provided as an environment variable "OPENAI_API_KEY".
    The inputs are lists that are user-defined depending on the desired metric.
    The output is a dictionary with a key `results` containing a list of evaluation results.
    Each result is a dictionary with user-defined keys and values of either 0 for FALSE or 1 for TRUE respectively.

    Usage example:
    ```python
    from typing import List
    from haystack.components.evaluators import LLMEvaluator
    evaluator = LLMEvaluator(
        instructions="Is this answer problematic for children?",
        inputs=[("predicted_answers", List[str])],
        outputs=["score"],
        examples=[
            {"inputs": {"predicted_answers": "Damn, this is straight outta hell!!!"}, "outputs": {"score": 1}},
            {"inputs": {"predicted_answers": "Football is the most popular sport."}, "outputs": {"score": 0}},
        ],
    )
    predicted_answers = [
        "Football is the most popular sport with around 4 billion followers worldwide",
        "Python language was created by Guido van Rossum.",
    ]
    results = evaluator.run(predicted_answers=predicted_answers)
    print(results)
    # {'results': [{'score': 0}, {'score': 0}]}
    ```
    """

    def __init__(  # pylint: disable=too-many-positional-arguments
        self,
        instructions: str,
        inputs: List[Tuple[str, Type[List]]],
        outputs: List[str],
        examples: List[Dict[str, Any]],
        progress_bar: bool = True,
        *,
        raise_on_failure: bool = True,
        chat_generator: Optional[ChatGenerator] = None,
    ):
        """
        Creates an instance of LLMEvaluator.

        If no LLM is specified using the `chat_generator` parameter, the component will use OpenAI in JSON mode.

        :param instructions:
            The prompt instructions to use for evaluation.
            Should be a question about the inputs that can be answered with yes or no.
        :param inputs:
            The inputs that the component expects as incoming connections and that it evaluates.
            Each input is a tuple of an input name and input type. Input types must be lists.
        :param outputs:
            Output names of the evaluation results. They correspond to keys in the output dictionary.
        :param examples:
            Few-shot examples conforming to the expected input and output format as defined in the `inputs` and
             `outputs` parameters.
            Each example is a dictionary with keys "inputs" and "outputs"
            They contain the input and output as dictionaries respectively.
        :param raise_on_failure:
            If True, the component will raise an exception on an unsuccessful API call.
        :param progress_bar:
            Whether to show a progress bar during the evaluation.
        :param chat_generator:
            a ChatGenerator instance which represents the LLM.
            In order for the component to work, the LLM should be configured to return a JSON object. For example,
            when using the OpenAIChatGenerator, you should pass `{"response_format": {"type": "json_object"}}` in the
            `generation_kwargs`.
        """
        self.validate_init_parameters(inputs, outputs, examples)
        component.set_input_types(self, **dict(inputs))

        self.raise_on_failure = raise_on_failure
        self.instructions = instructions
        self.inputs = inputs
        self.outputs = outputs
        self.examples = examples
        self.progress_bar = progress_bar

        template = self.prepare_template()
        self.builder = PromptBuilder(template=template)

        if chat_generator is not None:
            self._chat_generator = chat_generator
        else:
            generation_kwargs = {"response_format": {"type": "json_object"}, "seed": 42}
            self._chat_generator = OpenAIChatGenerator(generation_kwargs=generation_kwargs)

    @staticmethod
    def validate_init_parameters(
        inputs: List[Tuple[str, Type[List]]], outputs: List[str], examples: List[Dict[str, Any]]
    ):
        """
        Validate the init parameters.

        :param inputs:
            The inputs to validate.
        :param outputs:
            The outputs to validate.
        :param examples:
            The examples to validate.

        :raises ValueError:
            If the inputs are not a list of tuples with a string and a type of list.
            If the outputs are not a list of strings.
            If the examples are not a list of dictionaries.
            If any example does not have keys "inputs" and "outputs" with values that are dictionaries with string keys.
        """
        # Validate inputs
        if (
            not isinstance(inputs, list)
            or not all(isinstance(_input, tuple) for _input in inputs)
            or not all(isinstance(_input[0], str) and _input[1] is not list and len(_input) == 2 for _input in inputs)
        ):
            msg = (
                f"LLM evaluator expects inputs to be a list of tuples. Each tuple must contain an input name and "
                f"type of list but received {inputs}."
            )
            raise ValueError(msg)

        # Validate outputs
        if not isinstance(outputs, list) or not all(isinstance(output, str) for output in outputs):
            msg = f"LLM evaluator expects outputs to be a list of str but received {outputs}."
            raise ValueError(msg)

        # Validate examples are lists of dicts
        if not isinstance(examples, list) or not all(isinstance(example, dict) for example in examples):
            msg = f"LLM evaluator expects examples to be a list of dictionaries but received {examples}."
            raise ValueError(msg)

        # Validate each example
        for example in examples:
            if (
                {"inputs", "outputs"} != example.keys()
                or not all(isinstance(example[param], dict) for param in ["inputs", "outputs"])
                or not all(isinstance(key, str) for param in ["inputs", "outputs"] for key in example[param])
            ):
                msg = (
                    f"LLM evaluator expects each example to have keys `inputs` and `outputs` with values that are "
                    f"dictionaries with str keys but received {example}."
                )
                raise ValueError(msg)

    @component.output_types(results=List[Dict[str, Any]])
    def run(self, **inputs) -> Dict[str, Any]:
        """
        Run the LLM evaluator.

        :param inputs:
            The input values to evaluate. The keys are the input names and the values are lists of input values.
        :returns:
            A dictionary with a `results` entry that contains a list of results.
            Each result is a dictionary containing the keys as defined in the `outputs` parameter of the LLMEvaluator
            and the evaluation results as the values. If an exception occurs for a particular input value, the result
            will be `None` for that entry.
            If the API is "openai" and the response contains a "meta" key, the metadata from OpenAI will be included
            in the output dictionary, under the key "meta".
        :raises ValueError:
            Only in the case that  `raise_on_failure` is set to True and the received inputs are not lists or have
            different lengths, or if the output is not a valid JSON or doesn't contain the expected keys.
        """
        self.validate_input_parameters(dict(self.inputs), inputs)

        # inputs is a dictionary with keys being input names and values being a list of input values
        # We need to iterate through the lists in parallel for all keys of the dictionary
        input_names, values = inputs.keys(), list(zip(*inputs.values()))
        list_of_input_names_to_values = [dict(zip(input_names, v)) for v in values]

        results: List[Optional[Dict[str, Any]]] = []
        metadata = []
        errors = 0
        for input_names_to_values in tqdm(list_of_input_names_to_values, disable=not self.progress_bar):
            prompt = self.builder.run(**input_names_to_values)
            messages = [ChatMessage.from_user(prompt["prompt"])]
            try:
                result = self._chat_generator.run(messages=messages)
            except Exception as e:
                if self.raise_on_failure:
                    raise ValueError(f"Error while generating response for prompt: {prompt}. Error: {e}")
                logger.warning("Error while generating response for prompt: {prompt}. Error: {e}", prompt=prompt, e=e)
                results.append(None)
                errors += 1
                continue

            if self.is_valid_json_and_has_expected_keys(expected=self.outputs, received=result["replies"][0].text):
                parsed_result = json.loads(result["replies"][0].text)
                results.append(parsed_result)
            else:
                results.append(None)
                errors += 1

            if result["replies"][0].meta:
                metadata.append(result["replies"][0].meta)

        if errors > 0:
            logger.warning(
                "LLM evaluator failed for {errors} out of {len(list_of_input_names_to_values)} inputs.",
                errors=errors,
                len=len(list_of_input_names_to_values),
            )

        return {"results": results, "meta": metadata or None}

    def prepare_template(self) -> str:
        """
        Prepare the prompt template.

        Combine instructions, inputs, outputs, and examples into one prompt template with the following format:
        Instructions:
        <instructions>

        Generate the response in JSON format with the following keys:
        <list of output keys>
        Consider the instructions and the examples below to determine those values.

        Examples:
        <examples>

        Inputs:
        <inputs>
        Outputs:

        :returns:
            The prompt template.
        """
        inputs_section = (
            "{" + ", ".join([f'"{input_socket[0]}": {{{{ {input_socket[0]} }}}}' for input_socket in self.inputs]) + "}"
        )

        examples_section = "\n".join(
            [
                "Inputs:\n" + json.dumps(example["inputs"]) + "\nOutputs:\n" + json.dumps(example["outputs"])
                for example in self.examples
            ]
        )
        return (
            f"Instructions:\n"
            f"{self.instructions}\n\n"
            f"Generate the response in JSON format with the following keys:\n"
            f"{json.dumps(self.outputs)}\n"
            f"Consider the instructions and the examples below to determine those values.\n\n"
            f"Examples:\n"
            f"{examples_section}\n\n"
            f"Inputs:\n"
            f"{inputs_section}\n"
            f"Outputs:\n"
        )

    def to_dict(self) -> Dict[str, Any]:
        """
        Serialize this component to a dictionary.

        :returns:
            The serialized component as a dictionary.
        """
        # Since we cannot currently serialize tuples, convert the inputs to a list.
        inputs = [[name, serialize_type(type_)] for name, type_ in self.inputs]
        return default_to_dict(
            self,
            instructions=self.instructions,
            inputs=inputs,
            outputs=self.outputs,
            examples=self.examples,
            chat_generator=component_to_dict(obj=self._chat_generator, name="chat_generator"),
            progress_bar=self.progress_bar,
        )

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "LLMEvaluator":
        """
        Deserialize this component from a dictionary.

        :param data:
            The dictionary representation of this component.
        :returns:
            The deserialized component instance.
        """
        data["init_parameters"]["inputs"] = [
            (name, deserialize_type(type_)) for name, type_ in data["init_parameters"]["inputs"]
        ]

        if data["init_parameters"].get("chat_generator"):
            deserialize_chatgenerator_inplace(data["init_parameters"], key="chat_generator")

        return default_from_dict(cls, data)

    @staticmethod
    def validate_input_parameters(expected: Dict[str, Any], received: Dict[str, Any]) -> None:
        """
        Validate the input parameters.

        :param expected:
            The expected input parameters.
        :param received:
            The received input parameters.

        :raises ValueError:
            If not all expected inputs are present in the received inputs
            If the received inputs are not lists or have different lengths
        """
        # Validate that all expected inputs are present in the received inputs
        for param in expected.keys():
            if param not in received:
                msg = f"LLM evaluator expected input parameter '{param}' but received only {received.keys()}."
                raise ValueError(msg)

        # Validate that all received inputs are lists
        if not all(isinstance(_input, list) for _input in received.values()):
            msg = (
                "LLM evaluator expects all input values to be lists but received "
                f"{[type(_input) for _input in received.values()]}."
            )
            raise ValueError(msg)

        # Validate that all received inputs are of the same length
        inputs = received.values()
        length = len(next(iter(inputs)))
        if not all(len(_input) == length for _input in inputs):
            msg = (
                f"LLM evaluator expects all input lists to have the same length but received {inputs} with lengths "
                f"{[len(_input) for _input in inputs]}."
            )
            raise ValueError(msg)

    def is_valid_json_and_has_expected_keys(self, expected: List[str], received: str) -> bool:
        """
        Output must be a valid JSON with the expected keys.

        :param expected:
            Names of expected outputs
        :param received:
            Names of received outputs

        :raises ValueError:
            If the output is not a valid JSON with the expected keys:
            - with `raise_on_failure` set to True a ValueError is raised.
            - with `raise_on_failure` set to False a warning is issued and False is returned.

        :returns:
            True if the received output is a valid JSON with the expected keys, False otherwise.
        """
        try:
            parsed_output = json.loads(received)
        except json.JSONDecodeError:
            msg = "Response from LLM evaluator is not a valid JSON."
            if self.raise_on_failure:
                raise ValueError(msg)
            logger.warning(msg)
            return False

        if not all(output in parsed_output for output in expected):
            if self.raise_on_failure:
                raise ValueError(
                    f"Expected response from LLM evaluator to be JSON with keys {expected}, got {{received}}."
                )
            logger.warning(
                "Expected response from LLM evaluator to be JSON with keys {expected}, got {received}.",
                expected=expected,
                received=received,
            )
            return False

        return True

1	# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2	#
3	# SPDX-License-Identifier: Apache-2.0
4
5	import json	1✔
6	from typing import Any, Dict, List, Optional, Tuple, Type	1✔
7
8	from tqdm import tqdm	1✔
9
10	from haystack import component, default_from_dict, default_to_dict, logging	1✔
11	from haystack.components.builders import PromptBuilder	1✔
12	from haystack.components.generators.chat.openai import OpenAIChatGenerator	1✔
13	from haystack.components.generators.chat.types import ChatGenerator	1✔
14	from haystack.core.serialization import component_to_dict	1✔
15	from haystack.dataclasses.chat_message import ChatMessage	1✔
16	from haystack.utils import deserialize_chatgenerator_inplace, deserialize_type, serialize_type	1✔
17
18	logger = logging.getLogger(__name__)	1✔
19
20
21	@component	1✔
22	class LLMEvaluator:	1✔
23	"""
24	Uses an LLM to evaluate inputs based on a prompt containing instructions and examples.
25
26	The default API requires an OpenAI API key to be provided as an environment variable "OPENAI_API_KEY".
27	The inputs are lists that are user-defined depending on the desired metric.
28	The output is a dictionary with a key `results` containing a list of evaluation results.
29	Each result is a dictionary with user-defined keys and values of either 0 for FALSE or 1 for TRUE respectively.
30
31	Usage example:
32	```python
33	from typing import List
34	from haystack.components.evaluators import LLMEvaluator
35	evaluator = LLMEvaluator(
36	instructions="Is this answer problematic for children?",
37	inputs=[("predicted_answers", List[str])],
38	outputs=["score"],
39	examples=[
40	{"inputs": {"predicted_answers": "Damn, this is straight outta hell!!!"}, "outputs": {"score": 1}},
41	{"inputs": {"predicted_answers": "Football is the most popular sport."}, "outputs": {"score": 0}},
42	],
43	)
44	predicted_answers = [
45	"Football is the most popular sport with around 4 billion followers worldwide",
46	"Python language was created by Guido van Rossum.",
47	]
48	results = evaluator.run(predicted_answers=predicted_answers)
49	print(results)
50	# {'results': [{'score': 0}, {'score': 0}]}
51	```
52	"""
53
54	def __init__( # pylint: disable=too-many-positional-arguments	1✔
55	self,
56	instructions: str,
57	inputs: List[Tuple[str, Type[List]]],
58	outputs: List[str],
59	examples: List[Dict[str, Any]],
60	progress_bar: bool = True,
61	*,
62	raise_on_failure: bool = True,
63	chat_generator: Optional[ChatGenerator] = None,
64	):
65	"""
66	Creates an instance of LLMEvaluator.
67
68	If no LLM is specified using the `chat_generator` parameter, the component will use OpenAI in JSON mode.
69
70	:param instructions:
71	The prompt instructions to use for evaluation.
72	Should be a question about the inputs that can be answered with yes or no.
73	:param inputs:
74	The inputs that the component expects as incoming connections and that it evaluates.
75	Each input is a tuple of an input name and input type. Input types must be lists.
76	:param outputs:
77	Output names of the evaluation results. They correspond to keys in the output dictionary.
78	:param examples:
79	Few-shot examples conforming to the expected input and output format as defined in the `inputs` and
80	`outputs` parameters.
81	Each example is a dictionary with keys "inputs" and "outputs"
82	They contain the input and output as dictionaries respectively.
83	:param raise_on_failure:
84	If True, the component will raise an exception on an unsuccessful API call.
85	:param progress_bar:
86	Whether to show a progress bar during the evaluation.
87	:param chat_generator:
88	a ChatGenerator instance which represents the LLM.
89	In order for the component to work, the LLM should be configured to return a JSON object. For example,
90	when using the OpenAIChatGenerator, you should pass `{"response_format": {"type": "json_object"}}` in the
91	`generation_kwargs`.
92	"""
93	self.validate_init_parameters(inputs, outputs, examples)	1✔
94	component.set_input_types(self, **dict(inputs))	1✔
95
96	self.raise_on_failure = raise_on_failure	1✔
97	self.instructions = instructions	1✔
98	self.inputs = inputs	1✔
99	self.outputs = outputs	1✔
100	self.examples = examples	1✔
101	self.progress_bar = progress_bar	1✔
102
103	template = self.prepare_template()	1✔
104	self.builder = PromptBuilder(template=template)	1✔
105
106	if chat_generator is not None:	1✔
107	self._chat_generator = chat_generator	1✔
108	else:
109	generation_kwargs = {"response_format": {"type": "json_object"}, "seed": 42}	1✔
110	self._chat_generator = OpenAIChatGenerator(generation_kwargs=generation_kwargs)	1✔
111
112	@staticmethod	1✔
113	def validate_init_parameters(	1✔
114	inputs: List[Tuple[str, Type[List]]], outputs: List[str], examples: List[Dict[str, Any]]
115	):
116	"""
117	Validate the init parameters.
118
119	:param inputs:
120	The inputs to validate.
121	:param outputs:
122	The outputs to validate.
123	:param examples:
124	The examples to validate.
125
126	:raises ValueError:
127	If the inputs are not a list of tuples with a string and a type of list.
128	If the outputs are not a list of strings.
129	If the examples are not a list of dictionaries.
130	If any example does not have keys "inputs" and "outputs" with values that are dictionaries with string keys.
131	"""
132	# Validate inputs
133	if (	1✔
134	not isinstance(inputs, list)
135	or not all(isinstance(_input, tuple) for _input in inputs)
136	or not all(isinstance(_input[0], str) and _input[1] is not list and len(_input) == 2 for _input in inputs)
137	):
138	msg = (	1✔
139	f"LLM evaluator expects inputs to be a list of tuples. Each tuple must contain an input name and "
140	f"type of list but received {inputs}."
141	)
142	raise ValueError(msg)	1✔
143
144	# Validate outputs
145	if not isinstance(outputs, list) or not all(isinstance(output, str) for output in outputs):	1✔
146	msg = f"LLM evaluator expects outputs to be a list of str but received {outputs}."	1✔
147	raise ValueError(msg)	1✔
148
149	# Validate examples are lists of dicts
150	if not isinstance(examples, list) or not all(isinstance(example, dict) for example in examples):	1✔
151	msg = f"LLM evaluator expects examples to be a list of dictionaries but received {examples}."	1✔
152	raise ValueError(msg)	1✔
153
154	# Validate each example
155	for example in examples:	1✔
156	if (	1✔
157	{"inputs", "outputs"} != example.keys()
158	or not all(isinstance(example[param], dict) for param in ["inputs", "outputs"])
159	or not all(isinstance(key, str) for param in ["inputs", "outputs"] for key in example[param])
160	):
161	msg = (	1✔
162	f"LLM evaluator expects each example to have keys `inputs` and `outputs` with values that are "
163	f"dictionaries with str keys but received {example}."
164	)
165	raise ValueError(msg)	1✔
166
167	@component.output_types(results=List[Dict[str, Any]])	1✔
168	def run(self, **inputs) -> Dict[str, Any]:	1✔
169	"""
170	Run the LLM evaluator.
171
172	:param inputs:
173	The input values to evaluate. The keys are the input names and the values are lists of input values.
174	:returns:
175	A dictionary with a `results` entry that contains a list of results.
176	Each result is a dictionary containing the keys as defined in the `outputs` parameter of the LLMEvaluator
177	and the evaluation results as the values. If an exception occurs for a particular input value, the result
178	will be `None` for that entry.
179	If the API is "openai" and the response contains a "meta" key, the metadata from OpenAI will be included
180	in the output dictionary, under the key "meta".
181	:raises ValueError:
182	Only in the case that `raise_on_failure` is set to True and the received inputs are not lists or have
183	different lengths, or if the output is not a valid JSON or doesn't contain the expected keys.
184	"""
185	self.validate_input_parameters(dict(self.inputs), inputs)	1✔
186
187	# inputs is a dictionary with keys being input names and values being a list of input values
188	# We need to iterate through the lists in parallel for all keys of the dictionary
189	input_names, values = inputs.keys(), list(zip(*inputs.values()))	1✔
190	list_of_input_names_to_values = [dict(zip(input_names, v)) for v in values]	1✔
191
192	results: List[Optional[Dict[str, Any]]] = []	1✔
193	metadata = []	1✔
194	errors = 0	1✔
195	for input_names_to_values in tqdm(list_of_input_names_to_values, disable=not self.progress_bar):	1✔
196	prompt = self.builder.run(**input_names_to_values)	1✔
197	messages = [ChatMessage.from_user(prompt["prompt"])]	1✔
198	try:	1✔
199	result = self._chat_generator.run(messages=messages)	1✔
200	except Exception as e:	1✔
201	if self.raise_on_failure:	1✔
202	raise ValueError(f"Error while generating response for prompt: {prompt}. Error: {e}")	×
203	logger.warning("Error while generating response for prompt: {prompt}. Error: {e}", prompt=prompt, e=e)	1✔
204	results.append(None)	1✔
205	errors += 1	1✔
206	continue	1✔
207
208	if self.is_valid_json_and_has_expected_keys(expected=self.outputs, received=result["replies"][0].text):	1✔
209	parsed_result = json.loads(result["replies"][0].text)	1✔
210	results.append(parsed_result)	1✔
211	else:
212	results.append(None)	×
213	errors += 1	×
214
215	if result["replies"][0].meta:	1✔
216	metadata.append(result["replies"][0].meta)	×
217
218	if errors > 0:	1✔
219	logger.warning(	1✔
220	"LLM evaluator failed for {errors} out of {len(list_of_input_names_to_values)} inputs.",
221	errors=errors,
222	len=len(list_of_input_names_to_values),
223	)
224
225	return {"results": results, "meta": metadata or None}	1✔
226
227	def prepare_template(self) -> str:	1✔
228	"""
229	Prepare the prompt template.
230
231	Combine instructions, inputs, outputs, and examples into one prompt template with the following format:
232	Instructions:
233	<instructions>
234
235	Generate the response in JSON format with the following keys:
236	<list of output keys>
237	Consider the instructions and the examples below to determine those values.
238
239	Examples:
240	<examples>
241
242	Inputs:
243	<inputs>
244	Outputs:
245
246	:returns:
247	The prompt template.
248	"""
249	inputs_section = (	1✔
250	"{" + ", ".join([f'"{input_socket[0]}": {{{{ {input_socket[0]} }}}}' for input_socket in self.inputs]) + "}"
251	)
252
253	examples_section = "\n".join(	1✔
254	[
255	"Inputs:\n" + json.dumps(example["inputs"]) + "\nOutputs:\n" + json.dumps(example["outputs"])
256	for example in self.examples
257	]
258	)
259	return (	1✔
260	f"Instructions:\n"
261	f"{self.instructions}\n\n"
262	f"Generate the response in JSON format with the following keys:\n"
263	f"{json.dumps(self.outputs)}\n"
264	f"Consider the instructions and the examples below to determine those values.\n\n"
265	f"Examples:\n"
266	f"{examples_section}\n\n"
267	f"Inputs:\n"
268	f"{inputs_section}\n"
269	f"Outputs:\n"
270	)
271
272	def to_dict(self) -> Dict[str, Any]:	1✔
273	"""
274	Serialize this component to a dictionary.
275
276	:returns:
277	The serialized component as a dictionary.
278	"""
279	# Since we cannot currently serialize tuples, convert the inputs to a list.
280	inputs = [[name, serialize_type(type_)] for name, type_ in self.inputs]	1✔
281	return default_to_dict(	1✔
282	self,
283	instructions=self.instructions,
284	inputs=inputs,
285	outputs=self.outputs,
286	examples=self.examples,
287	chat_generator=component_to_dict(obj=self._chat_generator, name="chat_generator"),
288	progress_bar=self.progress_bar,
289	)
290
291	@classmethod	1✔
292	def from_dict(cls, data: Dict[str, Any]) -> "LLMEvaluator":	1✔
293	"""
294	Deserialize this component from a dictionary.
295
296	:param data:
297	The dictionary representation of this component.
298	:returns:
299	The deserialized component instance.
300	"""
301	data["init_parameters"]["inputs"] = [	1✔
302	(name, deserialize_type(type_)) for name, type_ in data["init_parameters"]["inputs"]
303	]
304
305	if data["init_parameters"].get("chat_generator"):	1✔
306	deserialize_chatgenerator_inplace(data["init_parameters"], key="chat_generator")	1✔
307
308	return default_from_dict(cls, data)	1✔
309
310	@staticmethod	1✔
311	def validate_input_parameters(expected: Dict[str, Any], received: Dict[str, Any]) -> None:	1✔
312	"""
313	Validate the input parameters.
314
315	:param expected:
316	The expected input parameters.
317	:param received:
318	The received input parameters.
319
320	:raises ValueError:
321	If not all expected inputs are present in the received inputs
322	If the received inputs are not lists or have different lengths
323	"""
324	# Validate that all expected inputs are present in the received inputs
325	for param in expected.keys():	1✔
326	if param not in received:	1✔
327	msg = f"LLM evaluator expected input parameter '{param}' but received only {received.keys()}."	1✔
328	raise ValueError(msg)	1✔
329
330	# Validate that all received inputs are lists
331	if not all(isinstance(_input, list) for _input in received.values()):	1✔
332	msg = (	1✔
333	"LLM evaluator expects all input values to be lists but received "
334	f"{[type(_input) for _input in received.values()]}."
335	)
336	raise ValueError(msg)	1✔
337
338	# Validate that all received inputs are of the same length
339	inputs = received.values()	1✔
340	length = len(next(iter(inputs)))	1✔
341	if not all(len(_input) == length for _input in inputs):	1✔
342	msg = (	1✔
343	f"LLM evaluator expects all input lists to have the same length but received {inputs} with lengths "
344	f"{[len(_input) for _input in inputs]}."
345	)
346	raise ValueError(msg)	1✔
347
348	def is_valid_json_and_has_expected_keys(self, expected: List[str], received: str) -> bool:	1✔
349	"""
350	Output must be a valid JSON with the expected keys.
351
352	:param expected:
353	Names of expected outputs
354	:param received:
355	Names of received outputs
356
357	:raises ValueError:
358	If the output is not a valid JSON with the expected keys:
359	- with `raise_on_failure` set to True a ValueError is raised.
360	- with `raise_on_failure` set to False a warning is issued and False is returned.
361
362	:returns:
363	True if the received output is a valid JSON with the expected keys, False otherwise.
364	"""
365	try:	1✔
366	parsed_output = json.loads(received)	1✔
367	except json.JSONDecodeError:	1✔
368	msg = "Response from LLM evaluator is not a valid JSON."	1✔
369	if self.raise_on_failure:	1✔
370	raise ValueError(msg)	1✔
371	logger.warning(msg)	1✔
372	return False	1✔
373
374	if not all(output in parsed_output for output in expected):	1✔
375	if self.raise_on_failure:	1✔
376	raise ValueError(	1✔
377	f"Expected response from LLM evaluator to be JSON with keys {expected}, got {{received}}."
378	)
379	logger.warning(	×
380	"Expected response from LLM evaluator to be JSON with keys {expected}, got {received}.",
381	expected=expected,
382	received=received,
383	)
384	return False	×
385
386	return True	1✔

deepset-ai / haystack / 14404479415

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous