• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 18466230493

13 Oct 2025 12:45PM UTC coverage: 92.025% (-0.04%) from 92.061%
18466230493

Pull #9869

github

web-flow
Merge a61227cef into 18b6482e2
Pull Request #9869: WIP: Fix serialization and deserialization of Enum type

13213 of 14358 relevant lines covered (92.03%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.57
haystack/utils/base_serialization.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from typing import Any
1✔
6

7
from haystack.core.errors import DeserializationError, SerializationError
1✔
8
from haystack.core.serialization import generate_qualified_class_name, import_class_by_name
1✔
9
from haystack.utils import deserialize_callable, serialize_callable
1✔
10

11
_PRIMITIVE_TO_SCHEMA_MAP = {type(None): "null", bool: "boolean", int: "integer", float: "number", str: "string"}
1✔
12

13

14
def serialize_class_instance(obj: Any) -> dict[str, Any]:
1✔
15
    """
16
    Serializes an object that has a `to_dict` method into a dictionary.
17

18
    :param obj:
19
        The object to be serialized.
20
    :returns:
21
        A dictionary representation of the object.
22
    :raises SerializationError:
23
        If the object does not have a `to_dict` method.
24
    """
25
    if not hasattr(obj, "to_dict"):
1✔
26
        raise SerializationError(f"Object of class '{type(obj).__name__}' does not have a 'to_dict' method")
1✔
27

28
    output = obj.to_dict()
1✔
29
    return {"type": generate_qualified_class_name(type(obj)), "data": output}
1✔
30

31

32
def deserialize_class_instance(data: dict[str, Any]) -> Any:
1✔
33
    """
34
    Deserializes an object from a dictionary representation generated by `auto_serialize_class_instance`.
35

36
    :param data:
37
        The dictionary to deserialize from.
38
    :returns:
39
        The deserialized object.
40
    :raises DeserializationError:
41
        If the serialization data is malformed, the class type cannot be imported, or the
42
        class does not have a `from_dict` method.
43
    """
44
    if "type" not in data:
1✔
45
        raise DeserializationError("Missing 'type' in serialization data")
1✔
46
    if "data" not in data:
1✔
47
        raise DeserializationError("Missing 'data' in serialization data")
1✔
48

49
    try:
1✔
50
        obj_class = import_class_by_name(data["type"])
1✔
51
    except ImportError as e:
1✔
52
        raise DeserializationError(f"Class '{data['type']}' not correctly imported") from e
1✔
53

54
    if not hasattr(obj_class, "from_dict"):
1✔
55
        raise DeserializationError(f"Class '{data['type']}' does not have a 'from_dict' method")
1✔
56

57
    return obj_class.from_dict(data["data"])
1✔
58

59

60
def _serialize_value_with_schema(payload: Any) -> dict[str, Any]:
1✔
61
    """
62
    Serializes a value into a schema-aware format suitable for storage or transmission.
63

64
    The output format separates the schema information from the actual data, making it easier
65
    to deserialize complex nested structures correctly.
66

67
    The function handles:
68
    - Objects with to_dict() methods (e.g. dataclasses)
69
    - Objects with __dict__ attributes
70
    - Dictionaries
71
    - Lists, tuples, and sets. Lists with mixed types are not supported.
72
    - Primitive types (str, int, float, bool, None)
73

74
    :param payload: The value to serialize (can be any type)
75
    :returns: The serialized dict representation of the given value. Contains two keys:
76
        - "serialization_schema": Contains type information for each field.
77
        - "serialized_data": Contains the actual data in a simplified format.
78

79
    """
80
    # Handle dictionary case - iterate through fields
81
    if isinstance(payload, dict):
1✔
82
        schema: dict[str, Any] = {}
1✔
83
        data: dict[str, Any] = {}
1✔
84

85
        for field, val in payload.items():
1✔
86
            # Recursively serialize each field
87
            serialized_value = _serialize_value_with_schema(val)
1✔
88
            schema[field] = serialized_value["serialization_schema"]
1✔
89
            data[field] = serialized_value["serialized_data"]
1✔
90

91
        return {"serialization_schema": {"type": "object", "properties": schema}, "serialized_data": data}
1✔
92

93
    # Handle array case - iterate through elements
94
    elif isinstance(payload, (list, tuple, set)):
1✔
95
        # Serialize each item in the array
96
        serialized_list = []
1✔
97
        for item in payload:
1✔
98
            serialized_value = _serialize_value_with_schema(item)
1✔
99
            serialized_list.append(serialized_value["serialized_data"])
1✔
100

101
        # Determine item type from first element (if any)
102
        if payload:
1✔
103
            first = next(iter(payload))
1✔
104
            item_schema = _serialize_value_with_schema(first)
1✔
105
            base_schema = {"type": "array", "items": item_schema["serialization_schema"]}
1✔
106
        else:
107
            base_schema = {"type": "array", "items": {}}
1✔
108

109
        # Add JSON Schema properties to infer sets and tuples
110
        if isinstance(payload, set):
1✔
111
            base_schema["uniqueItems"] = True
1✔
112
        elif isinstance(payload, tuple):
1✔
113
            base_schema["minItems"] = len(payload)
1✔
114
            base_schema["maxItems"] = len(payload)
1✔
115

116
        return {"serialization_schema": base_schema, "serialized_data": serialized_list}
1✔
117

118
    # Handle Haystack style objects (e.g. dataclasses and Components)
119
    elif hasattr(payload, "to_dict") and callable(payload.to_dict):
1✔
120
        type_name = generate_qualified_class_name(type(payload))
1✔
121
        schema = {"type": type_name}
1✔
122
        return {"serialization_schema": schema, "serialized_data": payload.to_dict()}
1✔
123

124
    # Handle callable functions serialization
125
    elif callable(payload) and not isinstance(payload, type):
1✔
126
        serialized = serialize_callable(payload)
1✔
127
        return {"serialization_schema": {"type": "typing.Callable"}, "serialized_data": serialized}
1✔
128

129
    # Handle arbitrary objects with __dict__
130
    elif hasattr(payload, "__dict__"):
1✔
131
        type_name = generate_qualified_class_name(type(payload))
×
132
        schema = {"type": type_name}
×
133
        serialized_data = {}
×
134
        for key, value in vars(payload).items():
×
135
            serialized_value = _serialize_value_with_schema(value)
×
136
            serialized_data[key] = serialized_value["serialized_data"]
×
137
        return {"serialization_schema": schema, "serialized_data": serialized_data}
×
138

139
    # Handle primitives
140
    else:
141
        schema = {"type": _primitive_schema_type(payload)}
1✔
142
        return {"serialization_schema": schema, "serialized_data": payload}
1✔
143

144

145
def _primitive_schema_type(value: Any) -> str:
1✔
146
    """
147
    Helper function to determine the schema type for primitive values.
148
    """
149
    for py_type, schema_value in _PRIMITIVE_TO_SCHEMA_MAP.items():
1✔
150
        if isinstance(value, py_type):
1✔
151
            return schema_value
1✔
152
    return "string"  # fallback
×
153

154

155
def _deserialize_value_with_schema(serialized: dict[str, Any]) -> Any:  # pylint: disable=too-many-return-statements, # noqa: PLR0911, PLR0912
1✔
156
    """
157
    Deserializes a value with schema information back to its original form.
158

159
    Takes a dict of the form:
160
      {
161
         "serialization_schema": {"type": "integer"} or {"type": "object", "properties": {...}},
162
         "serialized_data": <the actual data>
163
      }
164

165
    NOTE: For array types we only support homogeneous lists (all elements of the same type).
166

167
    :param serialized: The serialized dict with schema and data.
168
    :returns: The deserialized value in its original form.
169
    """
170

171
    if not serialized or "serialization_schema" not in serialized or "serialized_data" not in serialized:
1✔
172
        raise DeserializationError(
×
173
            f"Invalid format of passed serialized payload. Expected a dictionary with keys "
174
            f"'serialization_schema' and 'serialized_data'. Got: {serialized}"
175
        )
176
    schema = serialized["serialization_schema"]
1✔
177
    data = serialized["serialized_data"]
1✔
178

179
    schema_type = schema.get("type")
1✔
180

181
    # TODO This should be dropped now that we are at Haystack 2.18
182
    if not schema_type:
1✔
183
        # for backward compatibility till Haystack 2.16 we use legacy implementation
184
        raise DeserializationError(
×
185
            "Missing 'type' key in 'serialization_schema'. This likely indicates that you're using a serialized "
186
            "State object created with a version of Haystack older than 2.15.0. "
187
            "Support for the old serialization format is removed in Haystack 2.16.0. "
188
            "Please upgrade to the new serialization format to ensure forward compatibility."
189
        )
190

191
    # Handle object case (dictionary with properties)
192
    if schema_type == "object":
1✔
193
        properties = schema.get("properties")
1✔
194
        # TODO In what situation is properties missing if type is object?
195
        if properties:
1✔
196
            result: dict[str, Any] = {}
1✔
197

198
            # TODO In what situation is properties present but data is not a dict?
199
            if isinstance(data, dict):
1✔
200
                for field, raw_value in data.items():
1✔
201
                    field_schema = properties.get(field)
1✔
202
                    # TODO In what situation is field_schema missing? If can be missing we should log a warning
203
                    #      otherwise we are silently skipping fields
204
                    if field_schema:
1✔
205
                        # Recursively deserialize each field - avoid creating temporary dict
206
                        result[field] = _deserialize_value_with_schema(
1✔
207
                            {"serialization_schema": field_schema, "serialized_data": raw_value}
208
                        )
209

210
            return result
1✔
211
        else:
212
            # TODO No test hits this branch b/c schema_type object is only created for dicts with properties in
213
            #      _serialize_value_with_schema. Do we need this branch?
214
            return _deserialize_value(data)
1✔
215

216
    # Handle array case
217
    elif schema_type == "array":
1✔
218
        # Cache frequently accessed schema properties
219
        item_schema = schema.get("items", {})
1✔
220
        item_type = item_schema.get("type", "any")
1✔
221
        is_set = schema.get("uniqueItems") is True
1✔
222
        is_tuple = schema.get("minItems") is not None and schema.get("maxItems") is not None
1✔
223

224
        # Handle nested objects/arrays first (most complex case)
225
        if item_type in ("object", "array"):
1✔
226
            return [
1✔
227
                _deserialize_value_with_schema({"serialization_schema": item_schema, "serialized_data": item})
228
                for item in data
229
            ]
230

231
        # Helper function to deserialize individual items
232
        def deserialize_item(item):
1✔
233
            if item_type == "any":
1✔
234
                return _deserialize_value(item)
×
235
            else:
236
                return _deserialize_value({"type": item_type, "data": item})
1✔
237

238
        # Handle different collection types
239
        if is_set:
1✔
240
            return {deserialize_item(item) for item in data}
1✔
241
        elif is_tuple:
1✔
242
            return tuple(deserialize_item(item) for item in data)
1✔
243
        else:
244
            return [deserialize_item(item) for item in data]
1✔
245

246
    # Handle primitive types
247
    elif schema_type in _PRIMITIVE_TO_SCHEMA_MAP.values():
1✔
248
        return data
1✔
249

250
    # Handle callable functions
251
    elif schema_type == "typing.Callable":
1✔
252
        return deserialize_callable(data)
1✔
253

254
    # Handle custom class types
255
    else:
256
        return _deserialize_value({"type": schema_type, "data": data})
1✔
257

258

259
def _deserialize_value(value: Any) -> Any:  # pylint: disable=too-many-return-statements # noqa: PLR0911
1✔
260
    """
261
    Helper function to deserialize values from their envelope format {"type": T, "data": D}.
262

263
    Handles four cases:
264
    - Typed envelopes: {"type": T, "data": D} where T determines deserialization method
265
    - Plain dicts: recursively deserialize values
266
    - Collections (list/tuple/set): recursively deserialize elements
267
    - Other values: return as-is
268

269
    :param value: The value to deserialize
270
    :returns: The deserialized value
271

272
    """
273
    # 1) Envelope case
274
    if isinstance(value, dict) and "type" in value and "data" in value:
1✔
275
        value_type = value["type"]
1✔
276
        payload = value["data"]
1✔
277

278
        # 1.a) Array
279
        if value_type == "array":
1✔
280
            return [_deserialize_value(child) for child in payload]
×
281

282
        # 1.b) Generic object/dict
283
        if value_type == "object":
1✔
284
            return {k: _deserialize_value(v) for k, v in payload.items()}
×
285

286
        # 1.c) Primitive
287
        if value_type in ("null", "boolean", "integer", "number", "string"):
1✔
288
            return payload
1✔
289

290
        # 1.d) Callable
291
        if value_type == "typing.Callable":
1✔
292
            return deserialize_callable(payload)
×
293

294
        # 1.e) Custom class
295
        cls = import_class_by_name(value_type)
1✔
296
        # first, recursively deserialize the inner payload
297
        deserialized_payload = {k: _deserialize_value(v) for k, v in payload.items()}
1✔
298
        # try from_dict
299
        if hasattr(cls, "from_dict") and callable(cls.from_dict):
1✔
300
            return cls.from_dict(deserialized_payload)
1✔
301
        # fallback: set attributes on a blank instance
302
        instance = cls.__new__(cls)
×
303
        for attr_name, attr_value in deserialized_payload.items():
×
304
            setattr(instance, attr_name, attr_value)
×
305
        return instance
×
306

307
    # 2) Plain dict (no envelope) → recurse
308
    if isinstance(value, dict):
1✔
309
        return {k: _deserialize_value(v) for k, v in value.items()}
1✔
310

311
    # 3) Collections → recurse
312
    if isinstance(value, (list, tuple, set)):
1✔
313
        return type(value)(_deserialize_value(v) for v in value)
1✔
314

315
    # 4) Fallback (shouldn't usually happen with our schema)
316
    return value
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc