• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 9568249476

18 Jun 2024 03:52PM UTC coverage: 89.872% (-0.1%) from 89.995%
9568249476

push

github

web-flow
ci: Add code formatting checks  (#7882)

* ruff settings

enable ruff format and re-format outdated files

feat: `EvaluationRunResult` add parameter to specify columns to keep in the comparative `Dataframe`  (#7879)

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* adding param to explictily state which cols to keep

* updating tests

* adding release notes

* Update haystack/evaluation/eval_run_result.py

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* Update releasenotes/notes/add-keep-columns-to-EvalRunResult-comparative-be3e15ce45de3e0b.yaml

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* updating docstring

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

add format-check

fail on format and linting failures

fix string formatting

reformat long lines

fix tests

fix typing

linter

pull from main

* reformat

* lint -> check

* lint -> check

6957 of 7741 relevant lines covered (89.87%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
haystack/components/validators/json_schema.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import json
×
6
from typing import Any, Dict, List, Optional
×
7

8
from haystack import component
×
9
from haystack.dataclasses import ChatMessage
×
10
from haystack.lazy_imports import LazyImport
×
11

12
with LazyImport(message="Run 'pip install jsonschema'") as jsonschema_import:
×
13
    from jsonschema import ValidationError, validate
×
14

15

16
@component
×
17
class JsonSchemaValidator:
×
18
    """
19
    Validates JSON content of `ChatMessage` against a specified [JSON Schema](https://json-schema.org/).
20

21
    If JSON content of a message conforms to the provided schema, the message is passed along the "validated" output.
22
    If the JSON content does not conform to the schema, the message is passed along the "validation_error" output.
23
    In the latter case, the error message is constructed using the provided `error_template` or a default template.
24
    These error ChatMessages can be used by LLMs in Haystack 2.x recovery loops.
25

26
    Usage example:
27

28
    ```python
29
    from typing import List
30

31
    from haystack import Pipeline
32
    from haystack.components.generators.chat import OpenAIChatGenerator
33
    from haystack.components.others import Multiplexer
34
    from haystack.components.validators import JsonSchemaValidator
35
    from haystack import component
36
    from haystack.dataclasses import ChatMessage
37

38

39
    @component
40
    class MessageProducer:
41

42
        @component.output_types(messages=List[ChatMessage])
43
        def run(self, messages: List[ChatMessage]) -> dict:
44
            return {"messages": messages}
45

46

47
    p = Pipeline()
48
    p.add_component("llm", OpenAIChatGenerator(model="gpt-4-1106-preview",
49
                                               generation_kwargs={"response_format": {"type": "json_object"}}))
50
    p.add_component("schema_validator", JsonSchemaValidator())
51
    p.add_component("mx_for_llm", Multiplexer(List[ChatMessage]))
52
    p.add_component("message_producer", MessageProducer())
53

54
    p.connect("message_producer.messages", "mx_for_llm")
55
    p.connect("mx_for_llm", "llm")
56
    p.connect("llm.replies", "schema_validator.messages")
57
    p.connect("schema_validator.validation_error", "mx_for_llm")
58

59
    result = p.run(data={
60
        "message_producer": {
61
            "messages":[ChatMessage.from_user("Generate JSON for person with name 'John' and age 30")]},
62
            "schema_validator": {
63
                "json_schema": {
64
                    "type": "object",
65
                    "properties": {"name": {"type": "string"},
66
                    "age": {"type": "integer"}
67
                }
68
            }
69
        }
70
    })
71
    print(result)
72
    >> {'schema_validator': {'validated': [ChatMessage(content='\\n{\\n  "name": "John",\\n  "age": 30\\n}',
73
    role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-4-1106-preview', 'index': 0,
74
    'finish_reason': 'stop', 'usage': {'completion_tokens': 17, 'prompt_tokens': 20, 'total_tokens': 37}})]}}
75
    ```
76
    """
77

78
    # Default error description template
79
    default_error_template = (
×
80
        "The JSON content in the next message does not conform to the provided schema.\n"
81
        "Error details:\n- Message: {error_message}\n"
82
        "- Error Path in JSON: {error_path}\n"
83
        "- Schema Path: {error_schema_path}\n"
84
        "Please match the following schema:\n"
85
        "{json_schema}\n"
86
        "and provide the corrected JSON content ONLY."
87
    )
88

89
    def __init__(self, json_schema: Optional[Dict[str, Any]] = None, error_template: Optional[str] = None):
×
90
        """
91
        Initialize the JsonSchemaValidator component.
92

93
        :param json_schema: A dictionary representing the [JSON schema](https://json-schema.org/) against which
94
            the messages' content is validated.
95
        :param error_template: A custom template string for formatting the error message in case of validation failure.
96
        """
97
        jsonschema_import.check()
×
98
        self.json_schema = json_schema
×
99
        self.error_template = error_template
×
100

101
    @component.output_types(validated=List[ChatMessage], validation_error=List[ChatMessage])
×
102
    def run(
×
103
        self,
104
        messages: List[ChatMessage],
105
        json_schema: Optional[Dict[str, Any]] = None,
106
        error_template: Optional[str] = None,
107
    ) -> Dict[str, List[ChatMessage]]:
108
        """
109
        Validates the last of the provided messages against the specified json schema.
110

111
        If it does, the message is passed along the "validated" output. If it does not, the message is passed along
112
        the "validation_error" output.
113

114
        :param messages: A list of ChatMessage instances to be validated. The last message in this list is the one
115
            that is validated.
116
        :param json_schema: A dictionary representing the [JSON schema](https://json-schema.org/)
117
            against which the messages' content is validated. If not provided, the schema from the component init
118
            is used.
119
        :param error_template: A custom template string for formatting the error message in case of validation. If not
120
            provided, the `error_template` from the component init is used.
121
        :return:  A dictionary with the following keys:
122
            - "validated": A list of messages if the last message is valid.
123
            - "validation_error": A list of messages if the last message is invalid.
124
        :raises ValueError: If no JSON schema is provided or if the message content is not a dictionary or a list of
125
            dictionaries.
126
        """
127
        last_message = messages[-1]
×
128
        last_message_content = json.loads(last_message.content)
×
129

130
        json_schema = json_schema or self.json_schema
×
131
        error_template = error_template or self.error_template or self.default_error_template
×
132

133
        if not json_schema:
×
134
            raise ValueError("Provide a JSON schema for validation either in the run method or in the component init.")
×
135

136
        # fc payload is json object but subtree `parameters` is string - we need to convert to json object
137
        # we need complete json to validate it against schema
138
        last_message_json = self._recursive_json_to_object(last_message_content)
×
139
        using_openai_schema: bool = self._is_openai_function_calling_schema(json_schema)
×
140
        if using_openai_schema:
×
141
            validation_schema = json_schema["parameters"]
×
142
        else:
143
            validation_schema = json_schema
×
144
        try:
×
145
            last_message_json = [last_message_json] if not isinstance(last_message_json, list) else last_message_json
×
146
            for content in last_message_json:
×
147
                if using_openai_schema:
×
148
                    validate(instance=content["function"]["arguments"], schema=validation_schema)
×
149
                else:
150
                    validate(instance=content, schema=validation_schema)
×
151

152
            return {"validated": messages}
×
153
        except ValidationError as e:
×
154
            error_path = " -> ".join(map(str, e.absolute_path)) if e.absolute_path else "N/A"
×
155
            error_schema_path = " -> ".join(map(str, e.absolute_schema_path)) if e.absolute_schema_path else "N/A"
×
156

157
            error_template = error_template or self.default_error_template
×
158

159
            recovery_prompt = self._construct_error_recovery_message(
×
160
                error_template, str(e), error_path, error_schema_path, validation_schema
161
            )
162
            complete_message_list = [ChatMessage.from_user(recovery_prompt)] + messages
×
163
            return {"validation_error": complete_message_list}
×
164

165
    def _construct_error_recovery_message(
×
166
        self,
167
        error_template: str,
168
        error_message: str,
169
        error_path: str,
170
        error_schema_path: str,
171
        json_schema: Dict[str, Any],
172
    ) -> str:
173
        """
174
        Constructs an error recovery message using a specified template or the default one if none is provided.
175

176
        :param error_template: A custom template string for formatting the error message in case of validation failure.
177
        :param error_message: The error message returned by the JSON schema validator.
178
        :param error_path: The path in the JSON content where the error occurred.
179
        :param error_schema_path: The path in the JSON schema where the error occurred.
180
        :param json_schema: The JSON schema against which the content is validated.
181
        """
182
        error_template = error_template or self.default_error_template
×
183

184
        return error_template.format(
×
185
            error_message=error_message,
186
            error_path=error_path,
187
            error_schema_path=error_schema_path,
188
            json_schema=json_schema,
189
        )
190

191
    def _is_openai_function_calling_schema(self, json_schema: Dict[str, Any]) -> bool:
×
192
        """
193
        Checks if the provided schema is a valid OpenAI function calling schema.
194

195
        :param json_schema: The JSON schema to check
196
        :return: `True` if the schema is a valid OpenAI function calling schema; otherwise, `False`.
197
        """
198
        return all(key in json_schema for key in ["name", "description", "parameters"])
×
199

200
    def _recursive_json_to_object(self, data: Any) -> Any:
×
201
        """
202
        Convert any string values that are valid JSON objects into dictionary objects.
203

204
        Returns a new data structure.
205

206
        :param data: The data structure to be traversed.
207
        :return: A new data structure with JSON strings converted to dictionary objects.
208
        """
209
        if isinstance(data, list):
×
210
            return [self._recursive_json_to_object(item) for item in data]
×
211

212
        if isinstance(data, dict):
×
213
            new_dict = {}
×
214
            for key, value in data.items():
×
215
                if isinstance(value, str):
×
216
                    try:
×
217
                        json_value = json.loads(value)
×
218
                        new_dict[key] = (
×
219
                            self._recursive_json_to_object(json_value)
220
                            if isinstance(json_value, (dict, list))
221
                            else json_value
222
                        )
223
                    except json.JSONDecodeError:
×
224
                        new_dict[key] = value
×
225
                elif isinstance(value, dict):
×
226
                    new_dict[key] = self._recursive_json_to_object(value)
×
227
                else:
228
                    new_dict[key] = value
×
229
            return new_dict
×
230

231
        # If it's neither a list nor a dictionary, return the value directly
232
        raise ValueError("Input must be a dictionary or a list of dictionaries.")
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc