• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 15182943497

22 May 2025 09:26AM UTC coverage: 90.284% (-0.2%) from 90.445%
15182943497

Pull #9345

github

web-flow
Merge 8b3c2c41f into e6a53b9dc
Pull Request #9345: feat: add serialization to `State` / move `State` to agents.state

11207 of 12413 relevant lines covered (90.28%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.99
haystack/utils/base_serialization.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from typing import Any, Dict
1✔
6

7
from haystack.core.errors import DeserializationError, SerializationError
1✔
8
from haystack.core.serialization import generate_qualified_class_name, import_class_by_name
1✔
9

10

11
def serialize_class_instance(obj: Any) -> Dict[str, Any]:
1✔
12
    """
13
    Serializes an object that has a `to_dict` method into a dictionary.
14

15
    :param obj:
16
        The object to be serialized.
17
    :returns:
18
        A dictionary representation of the object.
19
    :raises SerializationError:
20
        If the object does not have a `to_dict` method.
21
    """
22
    if not hasattr(obj, "to_dict"):
1✔
23
        raise SerializationError(f"Object of class '{type(obj).__name__}' does not have a 'to_dict' method")
1✔
24

25
    output = obj.to_dict()
1✔
26
    return {"type": generate_qualified_class_name(type(obj)), "data": output}
1✔
27

28

29
def deserialize_class_instance(data: Dict[str, Any]) -> Any:
1✔
30
    """
31
    Deserializes an object from a dictionary representation generated by `auto_serialize_class_instance`.
32

33
    :param data:
34
        The dictionary to deserialize from.
35
    :returns:
36
        The deserialized object.
37
    :raises DeserializationError:
38
        If the serialization data is malformed, the class type cannot be imported, or the
39
        class does not have a `from_dict` method.
40
    """
41
    if "type" not in data:
1✔
42
        raise DeserializationError("Missing 'type' in serialization data")
1✔
43
    if "data" not in data:
1✔
44
        raise DeserializationError("Missing 'data' in serialization data")
1✔
45

46
    try:
1✔
47
        obj_class = import_class_by_name(data["type"])
1✔
48
    except ImportError as e:
1✔
49
        raise DeserializationError(f"Class '{data['type']}' not correctly imported") from e
1✔
50

51
    if not hasattr(obj_class, "from_dict"):
1✔
52
        raise DeserializationError(f"Class '{data['type']}' does not have a 'from_dict' method")
1✔
53

54
    return obj_class.from_dict(data["data"])
1✔
55

56

57
def serialize_value_with_schema(payload: Dict[str, Any]) -> Dict[str, Any]:
1✔
58
    """
59
    Serializes a dictionary into a schema-aware format suitable for storage or transmission.
60

61
    The output format separates the schema information from the actual data, making it easier
62
    to deserialize complex nested structures correctly.
63

64
    The function handles:
65
    - Objects with to_dict() methods (e.g. dataclasses)
66
    - Objects with __dict__ attributes
67
    - Dictionaries
68
    - Lists, tuples, and sets
69
    - Primitive types (str, int, float, bool, None)
70

71
    :param value: The value to serialize
72
    :returns: The serialized dict representation of the given value. Contains two keys:
73
        - "schema": Contains type information for each field
74
        - "data": Contains the actual data in a simplified format
75

76
    """
77
    schema: Dict[str, Any] = {}
1✔
78
    data: Dict[str, Any] = {}
1✔
79

80
    for field, val in payload.items():
1✔
81
        # 1) Handle dataclass‐style objects
82
        if hasattr(val, "to_dict") and callable(val.to_dict):
1✔
83
            type_name = generate_qualified_class_name(type(val))
1✔
84
            pure = _convert_to_basic_types(val.to_dict())
1✔
85
            schema[field] = {"type": type_name}
1✔
86
            data[field] = pure
1✔
87

88
        # 2) Arbitrary objects w/ __dict__
89
        elif hasattr(val, "__dict__"):
1✔
90
            type_name = generate_qualified_class_name(type(val))
×
91
            pure = _convert_to_basic_types(vars(val))
×
92
            schema[field] = {"type": type_name}
×
93
            data[field] = pure
×
94

95
        # 3) Dicts → "object"
96
        elif isinstance(val, dict):
1✔
97
            pure = _convert_to_basic_types(val)
1✔
98
            schema[field] = {"type": "object"}
1✔
99
            data[field] = pure
1✔
100

101
        # 4) Sequences → "array"
102
        elif isinstance(val, (list, tuple, set)):
1✔
103
            # pure data
104
            pure_list = _convert_to_basic_types(list(val))
1✔
105
            # determine item type from first element (if any)
106
            if val:
1✔
107
                first = next(iter(val))
1✔
108
                if hasattr(first, "to_dict") and callable(first.to_dict) or hasattr(first, "__dict__"):
1✔
109
                    item_type = generate_qualified_class_name(type(first))
1✔
110
                else:
111
                    item_type = _primitive_schema_type(first)
1✔
112
            else:
113
                item_type = "any"
×
114

115
            schema[field] = {"type": "array", "items": {"type": item_type}}
1✔
116
            data[field] = pure_list
1✔
117

118
        # 5) Primitives
119
        else:
120
            prim_type = _primitive_schema_type(val)
1✔
121
            schema[field] = {"type": prim_type}
1✔
122
            data[field] = val
1✔
123

124
    return {"serialization_schema": schema, "serialized_data": data}
1✔
125

126

127
def _primitive_schema_type(value: Any) -> str:
1✔
128
    """
129
    Helper function to determine the schema type for primitive values.
130
    """
131
    if value is None:
1✔
132
        return "null"
×
133
    if isinstance(value, bool):
1✔
134
        return "boolean"
×
135
    if isinstance(value, int):
1✔
136
        return "integer"
1✔
137
    if isinstance(value, float):
1✔
138
        return "number"
×
139
    if isinstance(value, str):
1✔
140
        return "string"
1✔
141
    return "string"  # fallback
1✔
142

143

144
def _convert_to_basic_types(value: Any) -> Any:
1✔
145
    """
146
    Helper function to recursively convert complex Python objects into their basic type equivalents.
147

148
    This helper function traverses through nested data structures and converts all complex
149
    objects (custom classes, dataclasses, etc.) into basic Python types (dict, list, str,
150
    int, float, bool, None) that can be easily serialized.
151

152
    The function handles:
153
    - Objects with to_dict() methods: converted using their to_dict implementation
154
    - Objects with __dict__ attribute: converted to plain dictionaries
155
    - Dictionaries: recursively converted values while preserving keys
156
    - Sequences (list, tuple, set): recursively converted while preserving type
157
    - Primitive types: returned as-is
158

159
    """
160
    # dataclass‐style objects
161
    if hasattr(value, "to_dict") and callable(value.to_dict):
1✔
162
        return _convert_to_basic_types(value.to_dict())
1✔
163

164
    # arbitrary objects with __dict__
165
    if hasattr(value, "__dict__"):
1✔
166
        return {k: _convert_to_basic_types(v) for k, v in vars(value).items()}
×
167

168
    # dicts
169
    if isinstance(value, dict):
1✔
170
        return {k: _convert_to_basic_types(v) for k, v in value.items()}
1✔
171

172
    # sequences
173
    if isinstance(value, (list, tuple, set)):
1✔
174
        cls = type(value)
1✔
175
        return cls(_convert_to_basic_types(v) for v in value)
1✔
176

177
    # primitive
178
    return value
1✔
179

180

181
def deserialize_value_with_schema(serialized: Dict[str, Any]) -> Dict[str, Any]:
1✔
182
    """
183
    Deserializes a dictionary with schema information and data to original values.
184

185
    Takes a dict of the form:
186
      {
187
         "schema": {
188
            "numbers": {"type": "integer"},
189
            "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
190
        },
191
        "data": {
192
            "numbers": 1,
193
            "messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
194
      }
195

196
    :param serialized: The serialized dict with schema and data.
197
    :returns: The deserialized dict with original values.
198
    """
199
    schema = serialized.get("serialization_schema", {})
1✔
200
    data = serialized.get("serialized_data", {})
1✔
201

202
    result: Dict[str, Any] = {}
1✔
203
    for field, raw in data.items():
1✔
204
        info = schema.get(field)
1✔
205
        # no schema entry → just deep-deserialize whatever we have
206
        if not info:
1✔
207
            result[field] = _deserialize_value(raw)
×
208
            continue
×
209

210
        t = info["type"]
1✔
211

212
        # ARRAY case
213
        if t == "array":
1✔
214
            item_type = info["items"]["type"]
1✔
215
            reconstructed = []
1✔
216
            for item in raw:
1✔
217
                envelope = {"type": item_type, "data": item}
1✔
218
                reconstructed.append(_deserialize_value(envelope))
1✔
219
            result[field] = reconstructed
1✔
220

221
        # PRIMITIVE case
222
        elif t in ("null", "boolean", "integer", "number", "string"):
1✔
223
            result[field] = raw
1✔
224

225
        # GENERIC OBJECT
226
        elif t == "object":
1✔
227
            envelope = {"type": "object", "data": raw}
1✔
228
            result[field] = _deserialize_value(envelope)
1✔
229

230
        # CUSTOM CLASS
231
        else:
232
            envelope = {"type": t, "data": raw}
1✔
233
            result[field] = _deserialize_value(envelope)
1✔
234

235
    return result
1✔
236

237

238
def _deserialize_value(value: Any) -> Any:  # pylint: disable=too-many-return-statements # noqa: PLR0911
1✔
239
    """
240
    Helper function to deserialize values from their envelope format {"type": T, "data": D}.
241

242
    Handles four cases:
243
    - Typed envelopes: {"type": T, "data": D} where T determines deserialization method
244
    - Plain dicts: recursively deserialize values
245
    - Collections (list/tuple/set): recursively deserialize elements
246
    - Other values: return as-is
247

248
    :param value: The value to deserialize
249
    :returns: The deserialized value
250

251
    """
252
    # 1) Envelope case
253
    if isinstance(value, dict) and "type" in value and "data" in value:
1✔
254
        t = value["type"]
1✔
255
        payload = value["data"]
1✔
256

257
        # 1.a) Array
258
        if t == "array":
1✔
259
            return [_deserialize_value(child) for child in payload]
×
260

261
        # 1.b) Generic object/dict
262
        if t == "object":
1✔
263
            return {k: _deserialize_value(v) for k, v in payload.items()}
1✔
264

265
        # 1.c) Primitive
266
        if t in ("null", "boolean", "integer", "number", "string"):
1✔
267
            return payload
1✔
268

269
        # 1.d) Custom class
270
        cls = import_class_by_name(t)
1✔
271
        # first, recursively deserialize the inner payload
272
        deserialized_payload = {k: _deserialize_value(v) for k, v in payload.items()}
1✔
273
        # try from_dict
274
        if hasattr(cls, "from_dict") and callable(cls.from_dict):
1✔
275
            return cls.from_dict(deserialized_payload)
1✔
276
        # fallback: set attributes on a blank instance
277
        instance = cls.__new__(cls)
×
278
        for attr_name, attr_value in deserialized_payload.items():
×
279
            setattr(instance, attr_name, attr_value)
×
280
        return instance
×
281

282
    # 2) Plain dict (no envelope) → recurse
283
    if isinstance(value, dict):
1✔
284
        return {k: _deserialize_value(v) for k, v in value.items()}
1✔
285

286
    # 3) Collections → recurse
287
    if isinstance(value, (list, tuple, set)):
1✔
288
        return type(value)(_deserialize_value(v) for v in value)
1✔
289

290
    # 4) Fallback (shouldn't usually happen with our schema)
291
    return value
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc