• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 18467750515

13 Oct 2025 01:41PM UTC coverage: 92.053% (-0.008%) from 92.061%
18467750515

Pull #9869

github

web-flow
Merge f11884ddb into 18b6482e2
Pull Request #9869: WIP: Fix serialization and deserialization of Enum type

13193 of 14332 relevant lines covered (92.05%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.11
haystack/utils/base_serialization.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from typing import Any
1✔
6

7
from haystack.core.errors import DeserializationError, SerializationError
1✔
8
from haystack.core.serialization import generate_qualified_class_name, import_class_by_name
1✔
9
from haystack.utils import deserialize_callable, serialize_callable
1✔
10

11
_PRIMITIVE_TO_SCHEMA_MAP = {type(None): "null", bool: "boolean", int: "integer", float: "number", str: "string"}
1✔
12

13

14
def serialize_class_instance(obj: Any) -> dict[str, Any]:
1✔
15
    """
16
    Serializes an object that has a `to_dict` method into a dictionary.
17

18
    :param obj:
19
        The object to be serialized.
20
    :returns:
21
        A dictionary representation of the object.
22
    :raises SerializationError:
23
        If the object does not have a `to_dict` method.
24
    """
25
    if not hasattr(obj, "to_dict"):
1✔
26
        raise SerializationError(f"Object of class '{type(obj).__name__}' does not have a 'to_dict' method")
1✔
27

28
    output = obj.to_dict()
1✔
29
    return {"type": generate_qualified_class_name(type(obj)), "data": output}
1✔
30

31

32
def deserialize_class_instance(data: dict[str, Any]) -> Any:
1✔
33
    """
34
    Deserializes an object from a dictionary representation generated by `auto_serialize_class_instance`.
35

36
    :param data:
37
        The dictionary to deserialize from.
38
    :returns:
39
        The deserialized object.
40
    :raises DeserializationError:
41
        If the serialization data is malformed, the class type cannot be imported, or the
42
        class does not have a `from_dict` method.
43
    """
44
    if "type" not in data:
1✔
45
        raise DeserializationError("Missing 'type' in serialization data")
1✔
46
    if "data" not in data:
1✔
47
        raise DeserializationError("Missing 'data' in serialization data")
1✔
48

49
    try:
1✔
50
        obj_class = import_class_by_name(data["type"])
1✔
51
    except ImportError as e:
1✔
52
        raise DeserializationError(f"Class '{data['type']}' not correctly imported") from e
1✔
53

54
    if not hasattr(obj_class, "from_dict"):
1✔
55
        raise DeserializationError(f"Class '{data['type']}' does not have a 'from_dict' method")
1✔
56

57
    return obj_class.from_dict(data["data"])
1✔
58

59

60
def _serialize_value_with_schema(payload: Any) -> dict[str, Any]:
1✔
61
    """
62
    Serializes a value into a schema-aware format suitable for storage or transmission.
63

64
    The output format separates the schema information from the actual data, making it easier
65
    to deserialize complex nested structures correctly.
66

67
    The function handles:
68
    - Objects with to_dict() methods (e.g. dataclasses)
69
    - Objects with __dict__ attributes
70
    - Dictionaries
71
    - Lists, tuples, and sets. Lists with mixed types are not supported.
72
    - Primitive types (str, int, float, bool, None)
73

74
    :param payload: The value to serialize (can be any type)
75
    :returns: The serialized dict representation of the given value. Contains two keys:
76
        - "serialization_schema": Contains type information for each field.
77
        - "serialized_data": Contains the actual data in a simplified format.
78

79
    """
80
    # Handle dictionary case - iterate through fields
81
    if isinstance(payload, dict):
1✔
82
        schema: dict[str, Any] = {}
1✔
83
        data: dict[str, Any] = {}
1✔
84

85
        for field, val in payload.items():
1✔
86
            # Recursively serialize each field
87
            serialized_value = _serialize_value_with_schema(val)
1✔
88
            schema[field] = serialized_value["serialization_schema"]
1✔
89
            data[field] = serialized_value["serialized_data"]
1✔
90

91
        return {"serialization_schema": {"type": "object", "properties": schema}, "serialized_data": data}
1✔
92

93
    # Handle array case - iterate through elements
94
    elif isinstance(payload, (list, tuple, set)):
1✔
95
        # Serialize each item in the array
96
        serialized_list = []
1✔
97
        for item in payload:
1✔
98
            serialized_value = _serialize_value_with_schema(item)
1✔
99
            serialized_list.append(serialized_value["serialized_data"])
1✔
100

101
        # Determine item type from first element (if any)
102
        if payload:
1✔
103
            first = next(iter(payload))
1✔
104
            item_schema = _serialize_value_with_schema(first)
1✔
105
            base_schema = {"type": "array", "items": item_schema["serialization_schema"]}
1✔
106
        else:
107
            base_schema = {"type": "array", "items": {}}
1✔
108

109
        # Add JSON Schema properties to infer sets and tuples
110
        if isinstance(payload, set):
1✔
111
            base_schema["uniqueItems"] = True
1✔
112
        elif isinstance(payload, tuple):
1✔
113
            base_schema["minItems"] = len(payload)
1✔
114
            base_schema["maxItems"] = len(payload)
1✔
115

116
        return {"serialization_schema": base_schema, "serialized_data": serialized_list}
1✔
117

118
    # Handle Haystack style objects (e.g. dataclasses and Components)
119
    elif hasattr(payload, "to_dict") and callable(payload.to_dict):
1✔
120
        type_name = generate_qualified_class_name(type(payload))
1✔
121
        schema = {"type": type_name}
1✔
122
        return {"serialization_schema": schema, "serialized_data": payload.to_dict()}
1✔
123

124
    # Handle callable functions serialization
125
    elif callable(payload) and not isinstance(payload, type):
1✔
126
        serialized = serialize_callable(payload)
1✔
127
        return {"serialization_schema": {"type": "typing.Callable"}, "serialized_data": serialized}
1✔
128

129
    # Handle arbitrary objects with __dict__
130
    elif hasattr(payload, "__dict__"):
1✔
131
        type_name = generate_qualified_class_name(type(payload))
×
132
        schema = {"type": type_name}
×
133
        serialized_data = {}
×
134
        for key, value in vars(payload).items():
×
135
            serialized_value = _serialize_value_with_schema(value)
×
136
            serialized_data[key] = serialized_value["serialized_data"]
×
137
        return {"serialization_schema": schema, "serialized_data": serialized_data}
×
138

139
    # Handle primitives
140
    else:
141
        schema = {"type": _primitive_schema_type(payload)}
1✔
142
        return {"serialization_schema": schema, "serialized_data": payload}
1✔
143

144

145
def _primitive_schema_type(value: Any) -> str:
1✔
146
    """
147
    Helper function to determine the schema type for primitive values.
148
    """
149
    for py_type, schema_value in _PRIMITIVE_TO_SCHEMA_MAP.items():
1✔
150
        if isinstance(value, py_type):
1✔
151
            return schema_value
1✔
152
    return "string"  # fallback
×
153

154

155
def _deserialize_value_with_schema(serialized: dict[str, Any]) -> Any:
1✔
156
    """
157
    Deserializes a value with schema information back to its original form.
158

159
    Takes a dict of the form:
160
      {
161
         "serialization_schema": {"type": "integer"} or {"type": "object", "properties": {...}},
162
         "serialized_data": <the actual data>
163
      }
164

165
    NOTE: For array types we only support homogeneous lists (all elements of the same type).
166

167
    :param serialized: The serialized dict with schema and data.
168
    :returns: The deserialized value in its original form.
169
    """
170

171
    if not serialized or "serialization_schema" not in serialized or "serialized_data" not in serialized:
1✔
172
        raise DeserializationError(
×
173
            f"Invalid format of passed serialized payload. Expected a dictionary with keys "
174
            f"'serialization_schema' and 'serialized_data'. Got: {serialized}"
175
        )
176
    schema = serialized["serialization_schema"]
1✔
177
    data = serialized["serialized_data"]
1✔
178

179
    schema_type = schema.get("type")
1✔
180

181
    # TODO Should this be dropped now that we are at Haystack 2.18
182
    if not schema_type:
1✔
183
        # for backward compatibility till Haystack 2.16 we use legacy implementation
184
        raise DeserializationError(
×
185
            "Missing 'type' key in 'serialization_schema'. This likely indicates that you're using a serialized "
186
            "State object created with a version of Haystack older than 2.15.0. "
187
            "Support for the old serialization format is removed in Haystack 2.16.0. "
188
            "Please upgrade to the new serialization format to ensure forward compatibility."
189
        )
190

191
    # Handle object case (dictionary with properties)
192
    if schema_type == "object":
1✔
193
        properties = schema["properties"]
1✔
194
        result: dict[str, Any] = {}
1✔
195
        for field, raw_value in data.items():
1✔
196
            field_schema = properties[field]
1✔
197
            # Recursively deserialize each field - avoid creating temporary dict
198
            result[field] = _deserialize_value_with_schema(
1✔
199
                {"serialization_schema": field_schema, "serialized_data": raw_value}
200
            )
201
        return result
1✔
202

203
    # Handle array case
204
    if schema_type == "array":
1✔
205
        # Deserialize each item
206
        deserialized_items = [
1✔
207
            _deserialize_value_with_schema({"serialization_schema": schema["items"], "serialized_data": item})
208
            for item in data
209
        ]
210
        # Is a set if uniqueItems is True
211
        if schema.get("uniqueItems") is True:
1✔
212
            final_array = set(deserialized_items)
1✔
213
        # Is a tuple if minItems and maxItems are set and equal
214
        elif schema.get("minItems") is not None and schema.get("maxItems") is not None:
1✔
215
            final_array = tuple(deserialized_items)
1✔
216
        else:
217
            # Otherwise, it's a list
218
            final_array = list(deserialized_items)
1✔
219
        return final_array
1✔
220

221
    # Handle primitive types
222
    if schema_type in _PRIMITIVE_TO_SCHEMA_MAP.values():
1✔
223
        return data
1✔
224

225
    # Handle callable functions
226
    if schema_type == "typing.Callable":
1✔
227
        return deserialize_callable(data)
1✔
228

229
    # Handle custom class types
230
    return _deserialize_value({"type": schema_type, "data": data})
1✔
231

232

233
def _deserialize_value(value: dict[str, Any]) -> Any:
1✔
234
    """
235
    Helper function to deserialize values from their envelope format {"type": T, "data": D}.
236

237
    :param value: The value to deserialize
238
    :returns:
239
        The deserialized value
240
    """
241
    # 1) Envelope case
242
    value_type = value["type"]
1✔
243
    payload = value["data"]
1✔
244

245
    # Custom class where value_type is a qualified class name
246
    cls = import_class_by_name(value_type)
1✔
247

248
    # try from_dict (e.g. Haystack dataclasses and Components)
249
    if hasattr(cls, "from_dict") and callable(cls.from_dict):
1✔
250
        return cls.from_dict(payload)
1✔
251

252
    # TODO If we reach this point we should probably log a warning that from_dict is missing and recommend to users
253
    #      to implement it for their custom classes
254
    # fallback: set attributes on a blank instance
255
    deserialized_payload = {k: _deserialize_value(v) for k, v in payload.items()}
×
256
    instance = cls.__new__(cls)
×
257
    for attr_name, attr_value in deserialized_payload.items():
×
258
        setattr(instance, attr_name, attr_value)
×
259
    return instance
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc