• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 18493209102

14 Oct 2025 10:15AM UTC coverage: 92.032% (-0.03%) from 92.061%
18493209102

Pull #9869

github

web-flow
Merge 61a015d2b into 18b6482e2
Pull Request #9869: feat: Add serialization and deserialization of Enum type when creating a `PipelineSnaphsot`

13201 of 14344 relevant lines covered (92.03%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.67
haystack/utils/base_serialization.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from enum import Enum
1✔
6
from typing import Any, Union
1✔
7

8
from haystack import logging
1✔
9
from haystack.core.errors import DeserializationError, SerializationError
1✔
10
from haystack.core.serialization import generate_qualified_class_name, import_class_by_name
1✔
11
from haystack.utils import deserialize_callable, serialize_callable
1✔
12

13
logger = logging.getLogger(__name__)
1✔
14

15
_PRIMITIVE_TO_SCHEMA_MAP = {type(None): "null", bool: "boolean", int: "integer", float: "number", str: "string"}
1✔
16

17

18
def serialize_class_instance(obj: Any) -> dict[str, Any]:
1✔
19
    """
20
    Serializes an object that has a `to_dict` method into a dictionary.
21

22
    :param obj:
23
        The object to be serialized.
24
    :returns:
25
        A dictionary representation of the object.
26
    :raises SerializationError:
27
        If the object does not have a `to_dict` method.
28
    """
29
    if not hasattr(obj, "to_dict"):
1✔
30
        raise SerializationError(f"Object of class '{type(obj).__name__}' does not have a 'to_dict' method")
1✔
31

32
    output = obj.to_dict()
1✔
33
    return {"type": generate_qualified_class_name(type(obj)), "data": output}
1✔
34

35

36
def deserialize_class_instance(data: dict[str, Any]) -> Any:
1✔
37
    """
38
    Deserializes an object from a dictionary representation generated by `auto_serialize_class_instance`.
39

40
    :param data:
41
        The dictionary to deserialize from.
42
    :returns:
43
        The deserialized object.
44
    :raises DeserializationError:
45
        If the serialization data is malformed, the class type cannot be imported, or the
46
        class does not have a `from_dict` method.
47
    """
48
    if "type" not in data:
1✔
49
        raise DeserializationError("Missing 'type' in serialization data")
1✔
50
    if "data" not in data:
1✔
51
        raise DeserializationError("Missing 'data' in serialization data")
1✔
52

53
    try:
1✔
54
        obj_class = import_class_by_name(data["type"])
1✔
55
    except ImportError as e:
1✔
56
        raise DeserializationError(f"Class '{data['type']}' not correctly imported") from e
1✔
57

58
    if not hasattr(obj_class, "from_dict"):
1✔
59
        raise DeserializationError(f"Class '{data['type']}' does not have a 'from_dict' method")
1✔
60

61
    return obj_class.from_dict(data["data"])
1✔
62

63

64
def _serialize_value_with_schema(payload: Any) -> dict[str, Any]:  # pylint: disable=too-many-return-statements
1✔
65
    """
66
    Serializes a value into a schema-aware format suitable for storage or transmission.
67

68
    The output format separates the schema information from the actual data, making it easier
69
    to deserialize complex nested structures correctly.
70

71
    The function handles:
72
    - Objects with to_dict() methods (e.g. dataclasses)
73
    - Objects with __dict__ attributes
74
    - Dictionaries
75
    - Lists, tuples, and sets. Lists with mixed types are not supported.
76
    - Primitive types (str, int, float, bool, None)
77

78
    :param payload: The value to serialize (can be any type)
79
    :returns: The serialized dict representation of the given value. Contains two keys:
80
        - "serialization_schema": Contains type information for each field.
81
        - "serialized_data": Contains the actual data in a simplified format.
82

83
    """
84
    # Handle dictionary case - iterate through fields
85
    if isinstance(payload, dict):
1✔
86
        schema: dict[str, Any] = {}
1✔
87
        data: dict[str, Any] = {}
1✔
88

89
        for field, val in payload.items():
1✔
90
            # Recursively serialize each field
91
            serialized_value = _serialize_value_with_schema(val)
1✔
92
            schema[field] = serialized_value["serialization_schema"]
1✔
93
            data[field] = serialized_value["serialized_data"]
1✔
94

95
        return {"serialization_schema": {"type": "object", "properties": schema}, "serialized_data": data}
1✔
96

97
    # Handle array case - iterate through elements
98
    elif isinstance(payload, (list, tuple, set)):
1✔
99
        # Serialize each item in the array
100
        serialized_list = []
1✔
101
        for item in payload:
1✔
102
            serialized_value = _serialize_value_with_schema(item)
1✔
103
            serialized_list.append(serialized_value["serialized_data"])
1✔
104

105
        # Determine item type from first element (if any)
106
        # NOTE: We do not support mixed-type lists
107
        if payload:
1✔
108
            first = next(iter(payload))
1✔
109
            item_schema = _serialize_value_with_schema(first)
1✔
110
            base_schema = {"type": "array", "items": item_schema["serialization_schema"]}
1✔
111
        else:
112
            base_schema = {"type": "array", "items": {}}
1✔
113

114
        # Add JSON Schema properties to infer sets and tuples
115
        if isinstance(payload, set):
1✔
116
            base_schema["uniqueItems"] = True
1✔
117
        elif isinstance(payload, tuple):
1✔
118
            base_schema["minItems"] = len(payload)
1✔
119
            base_schema["maxItems"] = len(payload)
1✔
120

121
        return {"serialization_schema": base_schema, "serialized_data": serialized_list}
1✔
122

123
    # Handle Haystack style objects (e.g. dataclasses and Components)
124
    elif hasattr(payload, "to_dict") and callable(payload.to_dict):
1✔
125
        type_name = generate_qualified_class_name(type(payload))
1✔
126
        schema = {"type": type_name}
1✔
127
        return {"serialization_schema": schema, "serialized_data": payload.to_dict()}
1✔
128

129
    # Handle callable functions serialization
130
    elif callable(payload) and not isinstance(payload, type):
1✔
131
        serialized = serialize_callable(payload)
1✔
132
        return {"serialization_schema": {"type": "typing.Callable"}, "serialized_data": serialized}
1✔
133

134
    # Handle Enums
135
    elif isinstance(payload, Enum):
1✔
136
        type_name = generate_qualified_class_name(type(payload))
1✔
137
        return {"serialization_schema": {"type": type_name}, "serialized_data": payload.name}
1✔
138

139
    # Handle arbitrary objects with __dict__
140
    elif hasattr(payload, "__dict__"):
1✔
141
        type_name = generate_qualified_class_name(type(payload))
×
142
        schema = {"type": type_name}
×
143
        serialized_data = {}
×
144
        for key, value in vars(payload).items():
×
145
            serialized_value = _serialize_value_with_schema(value)
×
146
            serialized_data[key] = serialized_value["serialized_data"]
×
147
        return {"serialization_schema": schema, "serialized_data": serialized_data}
×
148

149
    # Handle primitives
150
    else:
151
        schema = {"type": _primitive_schema_type(payload)}
1✔
152
        return {"serialization_schema": schema, "serialized_data": payload}
1✔
153

154

155
def _primitive_schema_type(value: Any) -> str:
1✔
156
    """
157
    Helper function to determine the schema type for primitive values.
158
    """
159
    for py_type, schema_value in _PRIMITIVE_TO_SCHEMA_MAP.items():
1✔
160
        if isinstance(value, py_type):
1✔
161
            return schema_value
1✔
162
    logger.warning(
×
163
        "Unsupported primitive type '{value_type}', falling back to 'string'", value_type=type(value).__name__
164
    )
165
    return "string"  # fallback
×
166

167

168
def _deserialize_value_with_schema(serialized: dict[str, Any]) -> Any:
1✔
169
    """
170
    Deserializes a value with schema information back to its original form.
171

172
    Takes a dict of the form:
173
      {
174
         "serialization_schema": {"type": "integer"} or {"type": "object", "properties": {...}},
175
         "serialized_data": <the actual data>
176
      }
177

178
    NOTE: For array types we only support homogeneous lists (all elements of the same type).
179

180
    :param serialized: The serialized dict with schema and data.
181
    :returns: The deserialized value in its original form.
182
    """
183

184
    if not serialized or "serialization_schema" not in serialized or "serialized_data" not in serialized:
1✔
185
        raise DeserializationError(
×
186
            f"Invalid format of passed serialized payload. Expected a dictionary with keys "
187
            f"'serialization_schema' and 'serialized_data'. Got: {serialized}"
188
        )
189
    schema = serialized["serialization_schema"]
1✔
190
    data = serialized["serialized_data"]
1✔
191

192
    schema_type = schema.get("type")
1✔
193

194
    if not schema_type:
1✔
195
        # for backward compatibility till Haystack 2.16 we use legacy implementation
196
        raise DeserializationError(
×
197
            "Missing 'type' key in 'serialization_schema'. This likely indicates that you're using a serialized "
198
            "State object created with a version of Haystack older than 2.15.0. "
199
            "Support for the old serialization format is removed in Haystack 2.16.0. "
200
            "Please upgrade to the new serialization format to ensure forward compatibility."
201
        )
202

203
    # Handle object case (dictionary with properties)
204
    if schema_type == "object":
1✔
205
        properties = schema["properties"]
1✔
206
        result: dict[str, Any] = {}
1✔
207
        for field, raw_value in data.items():
1✔
208
            field_schema = properties[field]
1✔
209
            # Recursively deserialize each field - avoid creating temporary dict
210
            result[field] = _deserialize_value_with_schema(
1✔
211
                {"serialization_schema": field_schema, "serialized_data": raw_value}
212
            )
213
        return result
1✔
214

215
    # Handle array case
216
    if schema_type == "array":
1✔
217
        # Deserialize each item
218
        deserialized_items = [
1✔
219
            _deserialize_value_with_schema({"serialization_schema": schema["items"], "serialized_data": item})
220
            for item in data
221
        ]
222
        final_array: Union[list, set, tuple]
223
        # Is a set if uniqueItems is True
224
        if schema.get("uniqueItems") is True:
1✔
225
            final_array = set(deserialized_items)
1✔
226
        # Is a tuple if minItems and maxItems are set
227
        elif schema.get("minItems") is not None and schema.get("maxItems") is not None:
1✔
228
            final_array = tuple(deserialized_items)
1✔
229
        else:
230
            # Otherwise, it's a list
231
            final_array = list(deserialized_items)
1✔
232
        return final_array
1✔
233

234
    # Handle primitive types
235
    if schema_type in _PRIMITIVE_TO_SCHEMA_MAP.values():
1✔
236
        return data
1✔
237

238
    # Handle callable functions
239
    if schema_type == "typing.Callable":
1✔
240
        return deserialize_callable(data)
1✔
241

242
    # Handle custom class types
243
    return _deserialize_value({"type": schema_type, "data": data})
1✔
244

245

246
def _deserialize_value(value: dict[str, Any]) -> Any:
1✔
247
    """
248
    Helper function to deserialize values from their envelope format {"type": T, "data": D}.
249

250
    This handles:
251
    - Custom classes (with a from_dict method)
252
    - Enums
253
    - Fallback for arbitrary classes (sets attributes on a blank instance)
254

255
    :param value: The value to deserialize
256
    :returns:
257
        The deserialized value
258
    :raises DeserializationError:
259
        If the type cannot be imported or the value is not valid for the type.
260
    """
261
    # 1) Envelope case
262
    value_type = value["type"]
1✔
263
    payload = value["data"]
1✔
264

265
    # Custom class where value_type is a qualified class name
266
    cls = import_class_by_name(value_type)
1✔
267

268
    # try from_dict (e.g. Haystack dataclasses and Components)
269
    if hasattr(cls, "from_dict") and callable(cls.from_dict):
1✔
270
        return cls.from_dict(payload)
1✔
271

272
    # handle enum types
273
    if issubclass(cls, Enum):
1✔
274
        try:
1✔
275
            return cls[payload]
1✔
276
        except Exception as e:
1✔
277
            raise DeserializationError(f"Value '{payload}' is not a valid member of Enum '{value_type}'") from e
1✔
278

279
    # fallback: set attributes on a blank instance
280
    deserialized_payload = {k: _deserialize_value(v) for k, v in payload.items()}
×
281
    instance = cls.__new__(cls)
×
282
    for attr_name, attr_value in deserialized_payload.items():
×
283
        setattr(instance, attr_name, attr_value)
×
284
    return instance
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc