• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 19036969662

03 Nov 2025 01:52PM UTC coverage: 92.248% (+0.004%) from 92.244%
19036969662

push

github

web-flow
feat: Add serialization and deserialization of pydantic BaseModels when creating a `PipelineSnaphsot` (#9973)

* Add pydantic model sede support

* Add reno

* Use model_validate

13507 of 14642 relevant lines covered (92.25%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.6
haystack/utils/base_serialization.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from enum import Enum
1✔
6
from typing import Any, Union
1✔
7

8
import pydantic
1✔
9

10
from haystack import logging
1✔
11
from haystack.core.errors import DeserializationError, SerializationError
1✔
12
from haystack.core.serialization import generate_qualified_class_name, import_class_by_name
1✔
13
from haystack.utils import deserialize_callable, serialize_callable
1✔
14

15
logger = logging.getLogger(__name__)
1✔
16

17
_PRIMITIVE_TO_SCHEMA_MAP = {type(None): "null", bool: "boolean", int: "integer", float: "number", str: "string"}
1✔
18

19

20
def serialize_class_instance(obj: Any) -> dict[str, Any]:
1✔
21
    """
22
    Serializes an object that has a `to_dict` method into a dictionary.
23

24
    :param obj:
25
        The object to be serialized.
26
    :returns:
27
        A dictionary representation of the object.
28
    :raises SerializationError:
29
        If the object does not have a `to_dict` method.
30
    """
31
    if not hasattr(obj, "to_dict"):
1✔
32
        raise SerializationError(f"Object of class '{type(obj).__name__}' does not have a 'to_dict' method")
1✔
33

34
    output = obj.to_dict()
1✔
35
    return {"type": generate_qualified_class_name(type(obj)), "data": output}
1✔
36

37

38
def deserialize_class_instance(data: dict[str, Any]) -> Any:
1✔
39
    """
40
    Deserializes an object from a dictionary representation generated by `auto_serialize_class_instance`.
41

42
    :param data:
43
        The dictionary to deserialize from.
44
    :returns:
45
        The deserialized object.
46
    :raises DeserializationError:
47
        If the serialization data is malformed, the class type cannot be imported, or the
48
        class does not have a `from_dict` method.
49
    """
50
    if "type" not in data:
1✔
51
        raise DeserializationError("Missing 'type' in serialization data")
1✔
52
    if "data" not in data:
1✔
53
        raise DeserializationError("Missing 'data' in serialization data")
1✔
54

55
    try:
1✔
56
        obj_class = import_class_by_name(data["type"])
1✔
57
    except ImportError as e:
1✔
58
        raise DeserializationError(f"Class '{data['type']}' not correctly imported") from e
1✔
59

60
    if not hasattr(obj_class, "from_dict"):
1✔
61
        raise DeserializationError(f"Class '{data['type']}' does not have a 'from_dict' method")
1✔
62

63
    return obj_class.from_dict(data["data"])
1✔
64

65

66
def _serialize_value_with_schema(payload: Any) -> dict[str, Any]:  # pylint: disable=too-many-return-statements # noqa: PLR0911
1✔
67
    """
68
    Serializes a value into a schema-aware format suitable for storage or transmission.
69

70
    The output format separates the schema information from the actual data, making it easier
71
    to deserialize complex nested structures correctly.
72

73
    The function handles:
74
    - Objects with to_dict() methods (e.g. dataclasses)
75
    - Objects with __dict__ attributes
76
    - Dictionaries
77
    - Lists, tuples, and sets. Lists with mixed types are not supported.
78
    - Primitive types (str, int, float, bool, None)
79

80
    :param payload: The value to serialize (can be any type)
81
    :returns: The serialized dict representation of the given value. Contains two keys:
82
        - "serialization_schema": Contains type information for each field.
83
        - "serialized_data": Contains the actual data in a simplified format.
84

85
    """
86
    # Handle pydantic
87
    if isinstance(payload, pydantic.BaseModel):
1✔
88
        type_name = generate_qualified_class_name(type(payload))
1✔
89
        return {"serialization_schema": {"type": type_name}, "serialized_data": payload.model_dump()}
1✔
90

91
    # Handle dictionary case - iterate through fields
92
    elif isinstance(payload, dict):
1✔
93
        schema: dict[str, Any] = {}
1✔
94
        data: dict[str, Any] = {}
1✔
95

96
        for field, val in payload.items():
1✔
97
            # Recursively serialize each field
98
            serialized_value = _serialize_value_with_schema(val)
1✔
99
            schema[field] = serialized_value["serialization_schema"]
1✔
100
            data[field] = serialized_value["serialized_data"]
1✔
101

102
        return {"serialization_schema": {"type": "object", "properties": schema}, "serialized_data": data}
1✔
103

104
    # Handle array case - iterate through elements
105
    elif isinstance(payload, (list, tuple, set)):
1✔
106
        # Serialize each item in the array
107
        serialized_list = []
1✔
108
        for item in payload:
1✔
109
            serialized_value = _serialize_value_with_schema(item)
1✔
110
            serialized_list.append(serialized_value["serialized_data"])
1✔
111

112
        # Determine item type from first element (if any)
113
        # NOTE: We do not support mixed-type lists
114
        if payload:
1✔
115
            first = next(iter(payload))
1✔
116
            item_schema = _serialize_value_with_schema(first)
1✔
117
            base_schema = {"type": "array", "items": item_schema["serialization_schema"]}
1✔
118
        else:
119
            base_schema = {"type": "array", "items": {}}
1✔
120

121
        # Add JSON Schema properties to infer sets and tuples
122
        if isinstance(payload, set):
1✔
123
            base_schema["uniqueItems"] = True
1✔
124
        elif isinstance(payload, tuple):
1✔
125
            base_schema["minItems"] = len(payload)
1✔
126
            base_schema["maxItems"] = len(payload)
1✔
127

128
        return {"serialization_schema": base_schema, "serialized_data": serialized_list}
1✔
129

130
    # Handle Haystack style objects (e.g. dataclasses and Components)
131
    elif hasattr(payload, "to_dict") and callable(payload.to_dict):
1✔
132
        type_name = generate_qualified_class_name(type(payload))
1✔
133
        schema = {"type": type_name}
1✔
134
        return {"serialization_schema": schema, "serialized_data": payload.to_dict()}
1✔
135

136
    # Handle callable functions serialization
137
    elif callable(payload) and not isinstance(payload, type):
1✔
138
        serialized = serialize_callable(payload)
1✔
139
        return {"serialization_schema": {"type": "typing.Callable"}, "serialized_data": serialized}
1✔
140

141
    # Handle Enums
142
    elif isinstance(payload, Enum):
1✔
143
        type_name = generate_qualified_class_name(type(payload))
1✔
144
        return {"serialization_schema": {"type": type_name}, "serialized_data": payload.name}
1✔
145

146
    # Handle arbitrary objects with __dict__
147
    elif hasattr(payload, "__dict__"):
1✔
148
        type_name = generate_qualified_class_name(type(payload))
×
149
        schema = {"type": type_name}
×
150
        serialized_data = {}
×
151
        for key, value in vars(payload).items():
×
152
            serialized_value = _serialize_value_with_schema(value)
×
153
            serialized_data[key] = serialized_value["serialized_data"]
×
154
        return {"serialization_schema": schema, "serialized_data": serialized_data}
×
155

156
    # Handle primitives
157
    else:
158
        schema = {"type": _primitive_schema_type(payload)}
1✔
159
        return {"serialization_schema": schema, "serialized_data": payload}
1✔
160

161

162
def _primitive_schema_type(value: Any) -> str:
1✔
163
    """
164
    Helper function to determine the schema type for primitive values.
165
    """
166
    for py_type, schema_value in _PRIMITIVE_TO_SCHEMA_MAP.items():
1✔
167
        if isinstance(value, py_type):
1✔
168
            return schema_value
1✔
169
    logger.warning(
×
170
        "Unsupported primitive type '{value_type}', falling back to 'string'", value_type=type(value).__name__
171
    )
172
    return "string"  # fallback
×
173

174

175
def _deserialize_value_with_schema(serialized: dict[str, Any]) -> Any:
1✔
176
    """
177
    Deserializes a value with schema information back to its original form.
178

179
    Takes a dict of the form:
180
      {
181
         "serialization_schema": {"type": "integer"} or {"type": "object", "properties": {...}},
182
         "serialized_data": <the actual data>
183
      }
184

185
    NOTE: For array types we only support homogeneous lists (all elements of the same type).
186

187
    :param serialized: The serialized dict with schema and data.
188
    :returns: The deserialized value in its original form.
189
    """
190

191
    if not serialized or "serialization_schema" not in serialized or "serialized_data" not in serialized:
1✔
192
        raise DeserializationError(
×
193
            f"Invalid format of passed serialized payload. Expected a dictionary with keys "
194
            f"'serialization_schema' and 'serialized_data'. Got: {serialized}"
195
        )
196
    schema = serialized["serialization_schema"]
1✔
197
    data = serialized["serialized_data"]
1✔
198

199
    schema_type = schema.get("type")
1✔
200

201
    if not schema_type:
1✔
202
        # for backward compatibility till Haystack 2.16 we use legacy implementation
203
        raise DeserializationError(
×
204
            "Missing 'type' key in 'serialization_schema'. This likely indicates that you're using a serialized "
205
            "State object created with a version of Haystack older than 2.15.0. "
206
            "Support for the old serialization format is removed in Haystack 2.16.0. "
207
            "Please upgrade to the new serialization format to ensure forward compatibility."
208
        )
209

210
    # Handle object case (dictionary with properties)
211
    if schema_type == "object":
1✔
212
        properties = schema["properties"]
1✔
213
        result: dict[str, Any] = {}
1✔
214
        for field, raw_value in data.items():
1✔
215
            field_schema = properties[field]
1✔
216
            # Recursively deserialize each field - avoid creating temporary dict
217
            result[field] = _deserialize_value_with_schema(
1✔
218
                {"serialization_schema": field_schema, "serialized_data": raw_value}
219
            )
220
        return result
1✔
221

222
    # Handle array case
223
    if schema_type == "array":
1✔
224
        # Deserialize each item
225
        deserialized_items = [
1✔
226
            _deserialize_value_with_schema({"serialization_schema": schema["items"], "serialized_data": item})
227
            for item in data
228
        ]
229
        final_array: Union[list, set, tuple]
230
        # Is a set if uniqueItems is True
231
        if schema.get("uniqueItems") is True:
1✔
232
            final_array = set(deserialized_items)
1✔
233
        # Is a tuple if minItems and maxItems are set
234
        elif schema.get("minItems") is not None and schema.get("maxItems") is not None:
1✔
235
            final_array = tuple(deserialized_items)
1✔
236
        else:
237
            # Otherwise, it's a list
238
            final_array = list(deserialized_items)
1✔
239
        return final_array
1✔
240

241
    # Handle primitive types
242
    if schema_type in _PRIMITIVE_TO_SCHEMA_MAP.values():
1✔
243
        return data
1✔
244

245
    # Handle callable functions
246
    if schema_type == "typing.Callable":
1✔
247
        return deserialize_callable(data)
1✔
248

249
    # Handle custom class types
250
    return _deserialize_value({"type": schema_type, "data": data})
1✔
251

252

253
def _deserialize_value(value: dict[str, Any]) -> Any:
1✔
254
    """
255
    Helper function to deserialize values from their envelope format {"type": T, "data": D}.
256

257
    This handles:
258
    - Custom classes (with a from_dict method)
259
    - Enums
260
    - Fallback for arbitrary classes (sets attributes on a blank instance)
261

262
    :param value: The value to deserialize
263
    :returns:
264
        The deserialized value
265
    :raises DeserializationError:
266
        If the type cannot be imported or the value is not valid for the type.
267
    """
268
    # 1) Envelope case
269
    value_type = value["type"]
1✔
270
    payload = value["data"]
1✔
271

272
    # Custom class where value_type is a qualified class name
273
    cls = import_class_by_name(value_type)
1✔
274

275
    # try from_dict (e.g. Haystack dataclasses and Components)
276
    if hasattr(cls, "from_dict") and callable(cls.from_dict):
1✔
277
        return cls.from_dict(payload)
1✔
278

279
    # handle pydantic models
280
    if issubclass(cls, pydantic.BaseModel):
1✔
281
        try:
1✔
282
            return cls.model_validate(payload)
1✔
283
        except Exception as e:
1✔
284
            raise DeserializationError(
1✔
285
                f"Failed to deserialize data '{payload}' into Pydantic model '{value_type}'"
286
            ) from e
287

288
    # handle enum types
289
    if issubclass(cls, Enum):
1✔
290
        try:
1✔
291
            return cls[payload]
1✔
292
        except Exception as e:
1✔
293
            raise DeserializationError(f"Value '{payload}' is not a valid member of Enum '{value_type}'") from e
1✔
294

295
    # fallback: set attributes on a blank instance
296
    deserialized_payload = {k: _deserialize_value(v) for k, v in payload.items()}
×
297
    instance = cls.__new__(cls)
×
298
    for attr_name, attr_value in deserialized_payload.items():
×
299
        setattr(instance, attr_name, attr_value)
×
300
    return instance
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc