• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 18490576998

14 Oct 2025 08:35AM UTC coverage: 92.058% (-0.003%) from 92.061%
18490576998

Pull #9869

github

web-flow
Merge 962d3ac4c into 18b6482e2
Pull Request #9869: feat: Add serialization and deserialization of Enum type when creating a `PipelineSnaphsot`

13202 of 14341 relevant lines covered (92.06%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.18
haystack/utils/base_serialization.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from enum import Enum
1✔
6
from typing import Any, Union
1✔
7

8
from haystack.core.errors import DeserializationError, SerializationError
1✔
9
from haystack.core.serialization import generate_qualified_class_name, import_class_by_name
1✔
10
from haystack.utils import deserialize_callable, serialize_callable
1✔
11

12
_PRIMITIVE_TO_SCHEMA_MAP = {type(None): "null", bool: "boolean", int: "integer", float: "number", str: "string"}
1✔
13

14

15
def serialize_class_instance(obj: Any) -> dict[str, Any]:
1✔
16
    """
17
    Serializes an object that has a `to_dict` method into a dictionary.
18

19
    :param obj:
20
        The object to be serialized.
21
    :returns:
22
        A dictionary representation of the object.
23
    :raises SerializationError:
24
        If the object does not have a `to_dict` method.
25
    """
26
    if not hasattr(obj, "to_dict"):
1✔
27
        raise SerializationError(f"Object of class '{type(obj).__name__}' does not have a 'to_dict' method")
1✔
28

29
    output = obj.to_dict()
1✔
30
    return {"type": generate_qualified_class_name(type(obj)), "data": output}
1✔
31

32

33
def deserialize_class_instance(data: dict[str, Any]) -> Any:
1✔
34
    """
35
    Deserializes an object from a dictionary representation generated by `auto_serialize_class_instance`.
36

37
    :param data:
38
        The dictionary to deserialize from.
39
    :returns:
40
        The deserialized object.
41
    :raises DeserializationError:
42
        If the serialization data is malformed, the class type cannot be imported, or the
43
        class does not have a `from_dict` method.
44
    """
45
    if "type" not in data:
1✔
46
        raise DeserializationError("Missing 'type' in serialization data")
1✔
47
    if "data" not in data:
1✔
48
        raise DeserializationError("Missing 'data' in serialization data")
1✔
49

50
    try:
1✔
51
        obj_class = import_class_by_name(data["type"])
1✔
52
    except ImportError as e:
1✔
53
        raise DeserializationError(f"Class '{data['type']}' not correctly imported") from e
1✔
54

55
    if not hasattr(obj_class, "from_dict"):
1✔
56
        raise DeserializationError(f"Class '{data['type']}' does not have a 'from_dict' method")
1✔
57

58
    return obj_class.from_dict(data["data"])
1✔
59

60

61
def _serialize_value_with_schema(payload: Any) -> dict[str, Any]:
1✔
62
    """
63
    Serializes a value into a schema-aware format suitable for storage or transmission.
64

65
    The output format separates the schema information from the actual data, making it easier
66
    to deserialize complex nested structures correctly.
67

68
    The function handles:
69
    - Objects with to_dict() methods (e.g. dataclasses)
70
    - Objects with __dict__ attributes
71
    - Dictionaries
72
    - Lists, tuples, and sets. Lists with mixed types are not supported.
73
    - Primitive types (str, int, float, bool, None)
74

75
    :param payload: The value to serialize (can be any type)
76
    :returns: The serialized dict representation of the given value. Contains two keys:
77
        - "serialization_schema": Contains type information for each field.
78
        - "serialized_data": Contains the actual data in a simplified format.
79

80
    """
81
    # Handle dictionary case - iterate through fields
82
    if isinstance(payload, dict):
1✔
83
        schema: dict[str, Any] = {}
1✔
84
        data: dict[str, Any] = {}
1✔
85

86
        for field, val in payload.items():
1✔
87
            # Recursively serialize each field
88
            serialized_value = _serialize_value_with_schema(val)
1✔
89
            schema[field] = serialized_value["serialization_schema"]
1✔
90
            data[field] = serialized_value["serialized_data"]
1✔
91

92
        return {"serialization_schema": {"type": "object", "properties": schema}, "serialized_data": data}
1✔
93

94
    # Handle array case - iterate through elements
95
    elif isinstance(payload, (list, tuple, set)):
1✔
96
        # Serialize each item in the array
97
        serialized_list = []
1✔
98
        for item in payload:
1✔
99
            serialized_value = _serialize_value_with_schema(item)
1✔
100
            serialized_list.append(serialized_value["serialized_data"])
1✔
101

102
        # Determine item type from first element (if any)
103
        if payload:
1✔
104
            first = next(iter(payload))
1✔
105
            item_schema = _serialize_value_with_schema(first)
1✔
106
            base_schema = {"type": "array", "items": item_schema["serialization_schema"]}
1✔
107
        else:
108
            base_schema = {"type": "array", "items": {}}
1✔
109

110
        # Add JSON Schema properties to infer sets and tuples
111
        if isinstance(payload, set):
1✔
112
            base_schema["uniqueItems"] = True
1✔
113
        elif isinstance(payload, tuple):
1✔
114
            base_schema["minItems"] = len(payload)
1✔
115
            base_schema["maxItems"] = len(payload)
1✔
116

117
        return {"serialization_schema": base_schema, "serialized_data": serialized_list}
1✔
118

119
    # Handle Haystack style objects (e.g. dataclasses and Components)
120
    elif hasattr(payload, "to_dict") and callable(payload.to_dict):
1✔
121
        type_name = generate_qualified_class_name(type(payload))
1✔
122
        schema = {"type": type_name}
1✔
123
        return {"serialization_schema": schema, "serialized_data": payload.to_dict()}
1✔
124

125
    # Handle callable functions serialization
126
    elif callable(payload) and not isinstance(payload, type):
1✔
127
        serialized = serialize_callable(payload)
1✔
128
        return {"serialization_schema": {"type": "typing.Callable"}, "serialized_data": serialized}
1✔
129

130
    # Handle Enums
131
    elif isinstance(payload, Enum):
1✔
132
        type_name = generate_qualified_class_name(type(payload))
1✔
133
        return {"serialization_schema": {"type": type_name}, "serialized_data": payload.name}
1✔
134

135
    # Handle arbitrary objects with __dict__
136
    elif hasattr(payload, "__dict__"):
1✔
137
        type_name = generate_qualified_class_name(type(payload))
×
138
        schema = {"type": type_name}
×
139
        serialized_data = {}
×
140
        for key, value in vars(payload).items():
×
141
            serialized_value = _serialize_value_with_schema(value)
×
142
            serialized_data[key] = serialized_value["serialized_data"]
×
143
        return {"serialization_schema": schema, "serialized_data": serialized_data}
×
144

145
    # Handle primitives
146
    else:
147
        schema = {"type": _primitive_schema_type(payload)}
1✔
148
        return {"serialization_schema": schema, "serialized_data": payload}
1✔
149

150

151
def _primitive_schema_type(value: Any) -> str:
1✔
152
    """
153
    Helper function to determine the schema type for primitive values.
154
    """
155
    for py_type, schema_value in _PRIMITIVE_TO_SCHEMA_MAP.items():
1✔
156
        if isinstance(value, py_type):
1✔
157
            return schema_value
1✔
158
    return "string"  # fallback
×
159

160

161
def _deserialize_value_with_schema(serialized: dict[str, Any]) -> Any:
1✔
162
    """
163
    Deserializes a value with schema information back to its original form.
164

165
    Takes a dict of the form:
166
      {
167
         "serialization_schema": {"type": "integer"} or {"type": "object", "properties": {...}},
168
         "serialized_data": <the actual data>
169
      }
170

171
    NOTE: For array types we only support homogeneous lists (all elements of the same type).
172

173
    :param serialized: The serialized dict with schema and data.
174
    :returns: The deserialized value in its original form.
175
    """
176

177
    if not serialized or "serialization_schema" not in serialized or "serialized_data" not in serialized:
1✔
178
        raise DeserializationError(
×
179
            f"Invalid format of passed serialized payload. Expected a dictionary with keys "
180
            f"'serialization_schema' and 'serialized_data'. Got: {serialized}"
181
        )
182
    schema = serialized["serialization_schema"]
1✔
183
    data = serialized["serialized_data"]
1✔
184

185
    schema_type = schema.get("type")
1✔
186

187
    if not schema_type:
1✔
188
        # for backward compatibility till Haystack 2.16 we use legacy implementation
189
        raise DeserializationError(
×
190
            "Missing 'type' key in 'serialization_schema'. This likely indicates that you're using a serialized "
191
            "State object created with a version of Haystack older than 2.15.0. "
192
            "Support for the old serialization format is removed in Haystack 2.16.0. "
193
            "Please upgrade to the new serialization format to ensure forward compatibility."
194
        )
195

196
    # Handle object case (dictionary with properties)
197
    if schema_type == "object":
1✔
198
        properties = schema["properties"]
1✔
199
        result: dict[str, Any] = {}
1✔
200
        for field, raw_value in data.items():
1✔
201
            field_schema = properties[field]
1✔
202
            # Recursively deserialize each field - avoid creating temporary dict
203
            result[field] = _deserialize_value_with_schema(
1✔
204
                {"serialization_schema": field_schema, "serialized_data": raw_value}
205
            )
206
        return result
1✔
207

208
    # Handle array case
209
    if schema_type == "array":
1✔
210
        # Deserialize each item
211
        deserialized_items = [
1✔
212
            _deserialize_value_with_schema({"serialization_schema": schema["items"], "serialized_data": item})
213
            for item in data
214
        ]
215
        final_array: Union[list, set, tuple]
216
        # Is a set if uniqueItems is True
217
        if schema.get("uniqueItems") is True:
1✔
218
            final_array = set(deserialized_items)
1✔
219
        # Is a tuple if minItems and maxItems are set and equal
220
        elif schema.get("minItems") is not None and schema.get("maxItems") is not None:
1✔
221
            final_array = tuple(deserialized_items)
1✔
222
        else:
223
            # Otherwise, it's a list
224
            final_array = list(deserialized_items)
1✔
225
        return final_array
1✔
226

227
    # Handle primitive types
228
    if schema_type in _PRIMITIVE_TO_SCHEMA_MAP.values():
1✔
229
        return data
1✔
230

231
    # Handle callable functions
232
    if schema_type == "typing.Callable":
1✔
233
        return deserialize_callable(data)
1✔
234

235
    # Handle custom class types
236
    return _deserialize_value({"type": schema_type, "data": data})
1✔
237

238

239
def _deserialize_value(value: dict[str, Any]) -> Any:
1✔
240
    """
241
    Helper function to deserialize values from their envelope format {"type": T, "data": D}.
242

243
    This handles:
244
    - Custom classes (with a from_dict method)
245
    - Enums
246
    - Fallback for arbitrary classes (sets attributes on a blank instance)
247

248
    :param value: The value to deserialize
249
    :returns:
250
        The deserialized value
251
    :raises DeserializationError:
252
        If the type cannot be imported or the value is not valid for the type.
253
    """
254
    # 1) Envelope case
255
    value_type = value["type"]
1✔
256
    payload = value["data"]
1✔
257

258
    # Custom class where value_type is a qualified class name
259
    cls = import_class_by_name(value_type)
1✔
260

261
    # try from_dict (e.g. Haystack dataclasses and Components)
262
    if hasattr(cls, "from_dict") and callable(cls.from_dict):
1✔
263
        return cls.from_dict(payload)
1✔
264

265
    # handle enum types
266
    if issubclass(cls, Enum):
1✔
267
        try:
1✔
268
            return cls[payload]
1✔
269
        except Exception as e:
1✔
270
            raise DeserializationError(f"Value '{payload}' is not a valid member of Enum '{value_type}'") from e
1✔
271

272
    # fallback: set attributes on a blank instance
273
    deserialized_payload = {k: _deserialize_value(v) for k, v in payload.items()}
×
274
    instance = cls.__new__(cls)
×
275
    for attr_name, attr_value in deserialized_payload.items():
×
276
        setattr(instance, attr_name, attr_value)
×
277
    return instance
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc