9568249476

Committed 18 Jun 2024 03:52PM UTC coverage: 89.872% (-0.1%) from 89.995%

Build # 9568249476

Build Type

push

github

Committed by

web-flow

Commit Message

ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check

Run Details

6957 of 7741 relevant lines covered (89.87%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

haystack/components/validators/json_schema.py

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import json
from typing import Any, Dict, List, Optional

from haystack import component
from haystack.dataclasses import ChatMessage
from haystack.lazy_imports import LazyImport

with LazyImport(message="Run 'pip install jsonschema'") as jsonschema_import:
    from jsonschema import ValidationError, validate


@component
class JsonSchemaValidator:
    """
    Validates JSON content of `ChatMessage` against a specified [JSON Schema](https://json-schema.org/).

    If JSON content of a message conforms to the provided schema, the message is passed along the "validated" output.
    If the JSON content does not conform to the schema, the message is passed along the "validation_error" output.
    In the latter case, the error message is constructed using the provided `error_template` or a default template.
    These error ChatMessages can be used by LLMs in Haystack 2.x recovery loops.

    Usage example:

    ```python
    from typing import List

    from haystack import Pipeline
    from haystack.components.generators.chat import OpenAIChatGenerator
    from haystack.components.others import Multiplexer
    from haystack.components.validators import JsonSchemaValidator
    from haystack import component
    from haystack.dataclasses import ChatMessage


    @component
    class MessageProducer:

        @component.output_types(messages=List[ChatMessage])
        def run(self, messages: List[ChatMessage]) -> dict:
            return {"messages": messages}


    p = Pipeline()
    p.add_component("llm", OpenAIChatGenerator(model="gpt-4-1106-preview",
                                               generation_kwargs={"response_format": {"type": "json_object"}}))
    p.add_component("schema_validator", JsonSchemaValidator())
    p.add_component("mx_for_llm", Multiplexer(List[ChatMessage]))
    p.add_component("message_producer", MessageProducer())

    p.connect("message_producer.messages", "mx_for_llm")
    p.connect("mx_for_llm", "llm")
    p.connect("llm.replies", "schema_validator.messages")
    p.connect("schema_validator.validation_error", "mx_for_llm")

    result = p.run(data={
        "message_producer": {
            "messages":[ChatMessage.from_user("Generate JSON for person with name 'John' and age 30")]},
            "schema_validator": {
                "json_schema": {
                    "type": "object",
                    "properties": {"name": {"type": "string"},
                    "age": {"type": "integer"}
                }
            }
        }
    })
    print(result)
    >> {'schema_validator': {'validated': [ChatMessage(content='\\n{\\n  "name": "John",\\n  "age": 30\\n}',
    role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-4-1106-preview', 'index': 0,
    'finish_reason': 'stop', 'usage': {'completion_tokens': 17, 'prompt_tokens': 20, 'total_tokens': 37}})]}}
    ```
    """

    # Default error description template
    default_error_template = (
        "The JSON content in the next message does not conform to the provided schema.\n"
        "Error details:\n- Message: {error_message}\n"
        "- Error Path in JSON: {error_path}\n"
        "- Schema Path: {error_schema_path}\n"
        "Please match the following schema:\n"
        "{json_schema}\n"
        "and provide the corrected JSON content ONLY."
    )

    def __init__(self, json_schema: Optional[Dict[str, Any]] = None, error_template: Optional[str] = None):
        """
        Initialize the JsonSchemaValidator component.

        :param json_schema: A dictionary representing the [JSON schema](https://json-schema.org/) against which
            the messages' content is validated.
        :param error_template: A custom template string for formatting the error message in case of validation failure.
        """
        jsonschema_import.check()
        self.json_schema = json_schema
        self.error_template = error_template

    @component.output_types(validated=List[ChatMessage], validation_error=List[ChatMessage])
    def run(
        self,
        messages: List[ChatMessage],
        json_schema: Optional[Dict[str, Any]] = None,
        error_template: Optional[str] = None,
    ) -> Dict[str, List[ChatMessage]]:
        """
        Validates the last of the provided messages against the specified json schema.

        If it does, the message is passed along the "validated" output. If it does not, the message is passed along
        the "validation_error" output.

        :param messages: A list of ChatMessage instances to be validated. The last message in this list is the one
            that is validated.
        :param json_schema: A dictionary representing the [JSON schema](https://json-schema.org/)
            against which the messages' content is validated. If not provided, the schema from the component init
            is used.
        :param error_template: A custom template string for formatting the error message in case of validation. If not
            provided, the `error_template` from the component init is used.
        :return:  A dictionary with the following keys:
            - "validated": A list of messages if the last message is valid.
            - "validation_error": A list of messages if the last message is invalid.
        :raises ValueError: If no JSON schema is provided or if the message content is not a dictionary or a list of
            dictionaries.
        """
        last_message = messages[-1]
        last_message_content = json.loads(last_message.content)

        json_schema = json_schema or self.json_schema
        error_template = error_template or self.error_template or self.default_error_template

        if not json_schema:
            raise ValueError("Provide a JSON schema for validation either in the run method or in the component init.")

        # fc payload is json object but subtree `parameters` is string - we need to convert to json object
        # we need complete json to validate it against schema
        last_message_json = self._recursive_json_to_object(last_message_content)
        using_openai_schema: bool = self._is_openai_function_calling_schema(json_schema)
        if using_openai_schema:
            validation_schema = json_schema["parameters"]
        else:
            validation_schema = json_schema
        try:
            last_message_json = [last_message_json] if not isinstance(last_message_json, list) else last_message_json
            for content in last_message_json:
                if using_openai_schema:
                    validate(instance=content["function"]["arguments"], schema=validation_schema)
                else:
                    validate(instance=content, schema=validation_schema)

            return {"validated": messages}
        except ValidationError as e:
            error_path = " -> ".join(map(str, e.absolute_path)) if e.absolute_path else "N/A"
            error_schema_path = " -> ".join(map(str, e.absolute_schema_path)) if e.absolute_schema_path else "N/A"

            error_template = error_template or self.default_error_template

            recovery_prompt = self._construct_error_recovery_message(
                error_template, str(e), error_path, error_schema_path, validation_schema
            )
            complete_message_list = [ChatMessage.from_user(recovery_prompt)] + messages
            return {"validation_error": complete_message_list}

    def _construct_error_recovery_message(
        self,
        error_template: str,
        error_message: str,
        error_path: str,
        error_schema_path: str,
        json_schema: Dict[str, Any],
    ) -> str:
        """
        Constructs an error recovery message using a specified template or the default one if none is provided.

        :param error_template: A custom template string for formatting the error message in case of validation failure.
        :param error_message: The error message returned by the JSON schema validator.
        :param error_path: The path in the JSON content where the error occurred.
        :param error_schema_path: The path in the JSON schema where the error occurred.
        :param json_schema: The JSON schema against which the content is validated.
        """
        error_template = error_template or self.default_error_template

        return error_template.format(
            error_message=error_message,
            error_path=error_path,
            error_schema_path=error_schema_path,
            json_schema=json_schema,
        )

    def _is_openai_function_calling_schema(self, json_schema: Dict[str, Any]) -> bool:
        """
        Checks if the provided schema is a valid OpenAI function calling schema.

        :param json_schema: The JSON schema to check
        :return: `True` if the schema is a valid OpenAI function calling schema; otherwise, `False`.
        """
        return all(key in json_schema for key in ["name", "description", "parameters"])

    def _recursive_json_to_object(self, data: Any) -> Any:
        """
        Convert any string values that are valid JSON objects into dictionary objects.

        Returns a new data structure.

        :param data: The data structure to be traversed.
        :return: A new data structure with JSON strings converted to dictionary objects.
        """
        if isinstance(data, list):
            return [self._recursive_json_to_object(item) for item in data]

        if isinstance(data, dict):
            new_dict = {}
            for key, value in data.items():
                if isinstance(value, str):
                    try:
                        json_value = json.loads(value)
                        new_dict[key] = (
                            self._recursive_json_to_object(json_value)
                            if isinstance(json_value, (dict, list))
                            else json_value
                        )
                    except json.JSONDecodeError:
                        new_dict[key] = value
                elif isinstance(value, dict):
                    new_dict[key] = self._recursive_json_to_object(value)
                else:
                    new_dict[key] = value
            return new_dict

        # If it's neither a list nor a dictionary, return the value directly
        raise ValueError("Input must be a dictionary or a list of dictionaries.")

1	# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2	#
3	# SPDX-License-Identifier: Apache-2.0
4
5	import json	×
6	from typing import Any, Dict, List, Optional	×
7
8	from haystack import component	×
9	from haystack.dataclasses import ChatMessage	×
10	from haystack.lazy_imports import LazyImport	×
11
12	with LazyImport(message="Run 'pip install jsonschema'") as jsonschema_import:	×
13	from jsonschema import ValidationError, validate	×
14
15
16	@component	×
17	class JsonSchemaValidator:	×
18	"""
19	Validates JSON content of `ChatMessage` against a specified [JSON Schema](https://json-schema.org/).
20
21	If JSON content of a message conforms to the provided schema, the message is passed along the "validated" output.
22	If the JSON content does not conform to the schema, the message is passed along the "validation_error" output.
23	In the latter case, the error message is constructed using the provided `error_template` or a default template.
24	These error ChatMessages can be used by LLMs in Haystack 2.x recovery loops.
25
26	Usage example:
27
28	```python
29	from typing import List
30
31	from haystack import Pipeline
32	from haystack.components.generators.chat import OpenAIChatGenerator
33	from haystack.components.others import Multiplexer
34	from haystack.components.validators import JsonSchemaValidator
35	from haystack import component
36	from haystack.dataclasses import ChatMessage
37
38
39	@component
40	class MessageProducer:
41
42	@component.output_types(messages=List[ChatMessage])
43	def run(self, messages: List[ChatMessage]) -> dict:
44	return {"messages": messages}
45
46
47	p = Pipeline()
48	p.add_component("llm", OpenAIChatGenerator(model="gpt-4-1106-preview",
49	generation_kwargs={"response_format": {"type": "json_object"}}))
50	p.add_component("schema_validator", JsonSchemaValidator())
51	p.add_component("mx_for_llm", Multiplexer(List[ChatMessage]))
52	p.add_component("message_producer", MessageProducer())
53
54	p.connect("message_producer.messages", "mx_for_llm")
55	p.connect("mx_for_llm", "llm")
56	p.connect("llm.replies", "schema_validator.messages")
57	p.connect("schema_validator.validation_error", "mx_for_llm")
58
59	result = p.run(data={
60	"message_producer": {
61	"messages":[ChatMessage.from_user("Generate JSON for person with name 'John' and age 30")]},
62	"schema_validator": {
63	"json_schema": {
64	"type": "object",
65	"properties": {"name": {"type": "string"},
66	"age": {"type": "integer"}
67	}
68	}
69	}
70	})
71	print(result)
72	>> {'schema_validator': {'validated': [ChatMessage(content='\\n{\\n "name": "John",\\n "age": 30\\n}',
73	role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-4-1106-preview', 'index': 0,
74	'finish_reason': 'stop', 'usage': {'completion_tokens': 17, 'prompt_tokens': 20, 'total_tokens': 37}})]}}
75	```
76	"""
77
78	# Default error description template
79	default_error_template = (	×
80	"The JSON content in the next message does not conform to the provided schema.\n"
81	"Error details:\n- Message: {error_message}\n"
82	"- Error Path in JSON: {error_path}\n"
83	"- Schema Path: {error_schema_path}\n"
84	"Please match the following schema:\n"
85	"{json_schema}\n"
86	"and provide the corrected JSON content ONLY."
87	)
88
89	def __init__(self, json_schema: Optional[Dict[str, Any]] = None, error_template: Optional[str] = None):	×
90	"""
91	Initialize the JsonSchemaValidator component.
92
93	:param json_schema: A dictionary representing the [JSON schema](https://json-schema.org/) against which
94	the messages' content is validated.
95	:param error_template: A custom template string for formatting the error message in case of validation failure.
96	"""
97	jsonschema_import.check()	×
98	self.json_schema = json_schema	×
99	self.error_template = error_template	×
100
101	@component.output_types(validated=List[ChatMessage], validation_error=List[ChatMessage])	×
102	def run(	×
103	self,
104	messages: List[ChatMessage],
105	json_schema: Optional[Dict[str, Any]] = None,
106	error_template: Optional[str] = None,
107	) -> Dict[str, List[ChatMessage]]:
108	"""
109	Validates the last of the provided messages against the specified json schema.
110
111	If it does, the message is passed along the "validated" output. If it does not, the message is passed along
112	the "validation_error" output.
113
114	:param messages: A list of ChatMessage instances to be validated. The last message in this list is the one
115	that is validated.
116	:param json_schema: A dictionary representing the [JSON schema](https://json-schema.org/)
117	against which the messages' content is validated. If not provided, the schema from the component init
118	is used.
119	:param error_template: A custom template string for formatting the error message in case of validation. If not
120	provided, the `error_template` from the component init is used.
121	:return: A dictionary with the following keys:
122	- "validated": A list of messages if the last message is valid.
123	- "validation_error": A list of messages if the last message is invalid.
124	:raises ValueError: If no JSON schema is provided or if the message content is not a dictionary or a list of
125	dictionaries.
126	"""
127	last_message = messages[-1]	×
128	last_message_content = json.loads(last_message.content)	×
129
130	json_schema = json_schema or self.json_schema	×
131	error_template = error_template or self.error_template or self.default_error_template	×
132
133	if not json_schema:	×
134	raise ValueError("Provide a JSON schema for validation either in the run method or in the component init.")	×
135
136	# fc payload is json object but subtree `parameters` is string - we need to convert to json object
137	# we need complete json to validate it against schema
138	last_message_json = self._recursive_json_to_object(last_message_content)	×
139	using_openai_schema: bool = self._is_openai_function_calling_schema(json_schema)	×
140	if using_openai_schema:	×
141	validation_schema = json_schema["parameters"]	×
142	else:
143	validation_schema = json_schema	×
144	try:	×
145	last_message_json = [last_message_json] if not isinstance(last_message_json, list) else last_message_json	×
146	for content in last_message_json:	×
147	if using_openai_schema:	×
148	validate(instance=content["function"]["arguments"], schema=validation_schema)	×
149	else:
150	validate(instance=content, schema=validation_schema)	×
151
152	return {"validated": messages}	×
153	except ValidationError as e:	×
154	error_path = " -> ".join(map(str, e.absolute_path)) if e.absolute_path else "N/A"	×
155	error_schema_path = " -> ".join(map(str, e.absolute_schema_path)) if e.absolute_schema_path else "N/A"	×
156
157	error_template = error_template or self.default_error_template	×
158
159	recovery_prompt = self._construct_error_recovery_message(	×
160	error_template, str(e), error_path, error_schema_path, validation_schema
161	)
162	complete_message_list = [ChatMessage.from_user(recovery_prompt)] + messages	×
163	return {"validation_error": complete_message_list}	×
164
165	def _construct_error_recovery_message(	×
166	self,
167	error_template: str,
168	error_message: str,
169	error_path: str,
170	error_schema_path: str,
171	json_schema: Dict[str, Any],
172	) -> str:
173	"""
174	Constructs an error recovery message using a specified template or the default one if none is provided.
175
176	:param error_template: A custom template string for formatting the error message in case of validation failure.
177	:param error_message: The error message returned by the JSON schema validator.
178	:param error_path: The path in the JSON content where the error occurred.
179	:param error_schema_path: The path in the JSON schema where the error occurred.
180	:param json_schema: The JSON schema against which the content is validated.
181	"""
182	error_template = error_template or self.default_error_template	×
183
184	return error_template.format(	×
185	error_message=error_message,
186	error_path=error_path,
187	error_schema_path=error_schema_path,
188	json_schema=json_schema,
189	)
190
191	def _is_openai_function_calling_schema(self, json_schema: Dict[str, Any]) -> bool:	×
192	"""
193	Checks if the provided schema is a valid OpenAI function calling schema.
194
195	:param json_schema: The JSON schema to check
196	:return: `True` if the schema is a valid OpenAI function calling schema; otherwise, `False`.
197	"""
198	return all(key in json_schema for key in ["name", "description", "parameters"])	×
199
200	def _recursive_json_to_object(self, data: Any) -> Any:	×
201	"""
202	Convert any string values that are valid JSON objects into dictionary objects.
203
204	Returns a new data structure.
205
206	:param data: The data structure to be traversed.
207	:return: A new data structure with JSON strings converted to dictionary objects.
208	"""
209	if isinstance(data, list):	×
210	return [self._recursive_json_to_object(item) for item in data]	×
211
212	if isinstance(data, dict):	×
213	new_dict = {}	×
214	for key, value in data.items():	×
215	if isinstance(value, str):	×
216	try:	×
217	json_value = json.loads(value)	×
218	new_dict[key] = (	×
219	self._recursive_json_to_object(json_value)
220	if isinstance(json_value, (dict, list))
221	else json_value
222	)
223	except json.JSONDecodeError:	×
224	new_dict[key] = value	×
225	elif isinstance(value, dict):	×
226	new_dict[key] = self._recursive_json_to_object(value)	×
227	else:
228	new_dict[key] = value	×
229	return new_dict	×
230
231	# If it's neither a list nor a dictionary, return the value directly
232	raise ValueError("Input must be a dictionary or a list of dictionaries.")	×

deepset-ai / haystack / 9568249476

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous