• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 15254528833

26 May 2025 12:56PM UTC coverage: 90.146% (-0.3%) from 90.411%
15254528833

Pull #9426

github

web-flow
Merge 06c2b66b1 into 802328e29
Pull Request #9426: feat: add component name and type to `StreamingChunk`

11398 of 12644 relevant lines covered (90.15%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.99
haystack/utils/base_serialization.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from typing import Any, Dict
1✔
6

7
from haystack.core.errors import DeserializationError, SerializationError
1✔
8
from haystack.core.serialization import generate_qualified_class_name, import_class_by_name
1✔
9

10

11
def serialize_class_instance(obj: Any) -> Dict[str, Any]:
1✔
12
    """
13
    Serializes an object that has a `to_dict` method into a dictionary.
14

15
    :param obj:
16
        The object to be serialized.
17
    :returns:
18
        A dictionary representation of the object.
19
    :raises SerializationError:
20
        If the object does not have a `to_dict` method.
21
    """
22
    if not hasattr(obj, "to_dict"):
1✔
23
        raise SerializationError(f"Object of class '{type(obj).__name__}' does not have a 'to_dict' method")
1✔
24

25
    output = obj.to_dict()
1✔
26
    return {"type": generate_qualified_class_name(type(obj)), "data": output}
1✔
27

28

29
def deserialize_class_instance(data: Dict[str, Any]) -> Any:
1✔
30
    """
31
    Deserializes an object from a dictionary representation generated by `auto_serialize_class_instance`.
32

33
    :param data:
34
        The dictionary to deserialize from.
35
    :returns:
36
        The deserialized object.
37
    :raises DeserializationError:
38
        If the serialization data is malformed, the class type cannot be imported, or the
39
        class does not have a `from_dict` method.
40
    """
41
    if "type" not in data:
1✔
42
        raise DeserializationError("Missing 'type' in serialization data")
1✔
43
    if "data" not in data:
1✔
44
        raise DeserializationError("Missing 'data' in serialization data")
1✔
45

46
    try:
1✔
47
        obj_class = import_class_by_name(data["type"])
1✔
48
    except ImportError as e:
1✔
49
        raise DeserializationError(f"Class '{data['type']}' not correctly imported") from e
1✔
50

51
    if not hasattr(obj_class, "from_dict"):
1✔
52
        raise DeserializationError(f"Class '{data['type']}' does not have a 'from_dict' method")
1✔
53

54
    return obj_class.from_dict(data["data"])
1✔
55

56

57
# TODO: Make this function public once its implementation is finalized and tested
58
def _serialize_value_with_schema(payload: Dict[str, Any]) -> Dict[str, Any]:
1✔
59
    """
60
    Serializes a dictionary into a schema-aware format suitable for storage or transmission.
61

62
    The output format separates the schema information from the actual data, making it easier
63
    to deserialize complex nested structures correctly.
64

65
    The function handles:
66
    - Objects with to_dict() methods (e.g. dataclasses)
67
    - Objects with __dict__ attributes
68
    - Dictionaries
69
    - Lists, tuples, and sets
70
    - Primitive types (str, int, float, bool, None)
71

72
    :param value: The value to serialize
73
    :returns: The serialized dict representation of the given value. Contains two keys:
74
        - "schema": Contains type information for each field
75
        - "data": Contains the actual data in a simplified format
76

77
    """
78
    schema: Dict[str, Any] = {}
1✔
79
    data: Dict[str, Any] = {}
1✔
80

81
    for field, val in payload.items():
1✔
82
        # 1) Handle dataclass‐style objects
83
        if hasattr(val, "to_dict") and callable(val.to_dict):
1✔
84
            type_name = generate_qualified_class_name(type(val))
1✔
85
            pure = _convert_to_basic_types(val.to_dict())
1✔
86
            schema[field] = {"type": type_name}
1✔
87
            data[field] = pure
1✔
88

89
        # 2) Arbitrary objects w/ __dict__
90
        elif hasattr(val, "__dict__"):
1✔
91
            type_name = generate_qualified_class_name(type(val))
×
92
            pure = _convert_to_basic_types(vars(val))
×
93
            schema[field] = {"type": type_name}
×
94
            data[field] = pure
×
95

96
        # 3) Dicts → "object"
97
        elif isinstance(val, dict):
1✔
98
            pure = _convert_to_basic_types(val)
1✔
99
            schema[field] = {"type": "object"}
1✔
100
            data[field] = pure
1✔
101

102
        # 4) Sequences → "array"
103
        elif isinstance(val, (list, tuple, set)):
1✔
104
            # pure data
105
            pure_list = _convert_to_basic_types(list(val))
1✔
106
            # determine item type from first element (if any)
107
            if val:
1✔
108
                first = next(iter(val))
1✔
109
                if hasattr(first, "to_dict") and callable(first.to_dict) or hasattr(first, "__dict__"):
1✔
110
                    item_type = generate_qualified_class_name(type(first))
1✔
111
                else:
112
                    item_type = _primitive_schema_type(first)
1✔
113
            else:
114
                item_type = "any"
×
115

116
            schema[field] = {"type": "array", "items": {"type": item_type}}
1✔
117
            data[field] = pure_list
1✔
118

119
        # 5) Primitives
120
        else:
121
            prim_type = _primitive_schema_type(val)
1✔
122
            schema[field] = {"type": prim_type}
1✔
123
            data[field] = val
1✔
124

125
    return {"serialization_schema": schema, "serialized_data": data}
1✔
126

127

128
def _primitive_schema_type(value: Any) -> str:
1✔
129
    """
130
    Helper function to determine the schema type for primitive values.
131
    """
132
    if value is None:
1✔
133
        return "null"
×
134
    if isinstance(value, bool):
1✔
135
        return "boolean"
×
136
    if isinstance(value, int):
1✔
137
        return "integer"
1✔
138
    if isinstance(value, float):
1✔
139
        return "number"
×
140
    if isinstance(value, str):
1✔
141
        return "string"
1✔
142
    return "string"  # fallback
1✔
143

144

145
def _convert_to_basic_types(value: Any) -> Any:
1✔
146
    """
147
    Helper function to recursively convert complex Python objects into their basic type equivalents.
148

149
    This helper function traverses through nested data structures and converts all complex
150
    objects (custom classes, dataclasses, etc.) into basic Python types (dict, list, str,
151
    int, float, bool, None) that can be easily serialized.
152

153
    The function handles:
154
    - Objects with to_dict() methods: converted using their to_dict implementation
155
    - Objects with __dict__ attribute: converted to plain dictionaries
156
    - Dictionaries: recursively converted values while preserving keys
157
    - Sequences (list, tuple, set): recursively converted while preserving type
158
    - Primitive types: returned as-is
159

160
    """
161
    # dataclass‐style objects
162
    if hasattr(value, "to_dict") and callable(value.to_dict):
1✔
163
        return _convert_to_basic_types(value.to_dict())
1✔
164

165
    # arbitrary objects with __dict__
166
    if hasattr(value, "__dict__"):
1✔
167
        return {k: _convert_to_basic_types(v) for k, v in vars(value).items()}
×
168

169
    # dicts
170
    if isinstance(value, dict):
1✔
171
        return {k: _convert_to_basic_types(v) for k, v in value.items()}
1✔
172

173
    # sequences
174
    if isinstance(value, (list, tuple, set)):
1✔
175
        cls = type(value)
1✔
176
        return cls(_convert_to_basic_types(v) for v in value)
1✔
177

178
    # primitive
179
    return value
1✔
180

181

182
# TODO: Make this function public once its implementation is finalized and tested
183
def _deserialize_value_with_schema(serialized: Dict[str, Any]) -> Dict[str, Any]:
1✔
184
    """
185
    Deserializes a dictionary with schema information and data to original values.
186

187
    Takes a dict of the form:
188
      {
189
         "schema": {
190
            "numbers": {"type": "integer"},
191
            "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
192
        },
193
        "data": {
194
            "numbers": 1,
195
            "messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
196
      }
197

198
    :param serialized: The serialized dict with schema and data.
199
    :returns: The deserialized dict with original values.
200
    """
201
    schema = serialized.get("serialization_schema", {})
1✔
202
    data = serialized.get("serialized_data", {})
1✔
203

204
    result: Dict[str, Any] = {}
1✔
205
    for field, raw in data.items():
1✔
206
        info = schema.get(field)
1✔
207
        # no schema entry → just deep-deserialize whatever we have
208
        if not info:
1✔
209
            result[field] = _deserialize_value(raw)
×
210
            continue
×
211

212
        t = info["type"]
1✔
213

214
        # ARRAY case
215
        if t == "array":
1✔
216
            item_type = info["items"]["type"]
1✔
217
            reconstructed = []
1✔
218
            for item in raw:
1✔
219
                envelope = {"type": item_type, "data": item}
1✔
220
                reconstructed.append(_deserialize_value(envelope))
1✔
221
            result[field] = reconstructed
1✔
222

223
        # PRIMITIVE case
224
        elif t in ("null", "boolean", "integer", "number", "string"):
1✔
225
            result[field] = raw
1✔
226

227
        # GENERIC OBJECT
228
        elif t == "object":
1✔
229
            envelope = {"type": "object", "data": raw}
1✔
230
            result[field] = _deserialize_value(envelope)
1✔
231

232
        # CUSTOM CLASS
233
        else:
234
            envelope = {"type": t, "data": raw}
1✔
235
            result[field] = _deserialize_value(envelope)
1✔
236

237
    return result
1✔
238

239

240
def _deserialize_value(value: Any) -> Any:  # pylint: disable=too-many-return-statements # noqa: PLR0911
1✔
241
    """
242
    Helper function to deserialize values from their envelope format {"type": T, "data": D}.
243

244
    Handles four cases:
245
    - Typed envelopes: {"type": T, "data": D} where T determines deserialization method
246
    - Plain dicts: recursively deserialize values
247
    - Collections (list/tuple/set): recursively deserialize elements
248
    - Other values: return as-is
249

250
    :param value: The value to deserialize
251
    :returns: The deserialized value
252

253
    """
254
    # 1) Envelope case
255
    if isinstance(value, dict) and "type" in value and "data" in value:
1✔
256
        t = value["type"]
1✔
257
        payload = value["data"]
1✔
258

259
        # 1.a) Array
260
        if t == "array":
1✔
261
            return [_deserialize_value(child) for child in payload]
×
262

263
        # 1.b) Generic object/dict
264
        if t == "object":
1✔
265
            return {k: _deserialize_value(v) for k, v in payload.items()}
1✔
266

267
        # 1.c) Primitive
268
        if t in ("null", "boolean", "integer", "number", "string"):
1✔
269
            return payload
1✔
270

271
        # 1.d) Custom class
272
        cls = import_class_by_name(t)
1✔
273
        # first, recursively deserialize the inner payload
274
        deserialized_payload = {k: _deserialize_value(v) for k, v in payload.items()}
1✔
275
        # try from_dict
276
        if hasattr(cls, "from_dict") and callable(cls.from_dict):
1✔
277
            return cls.from_dict(deserialized_payload)
1✔
278
        # fallback: set attributes on a blank instance
279
        instance = cls.__new__(cls)
×
280
        for attr_name, attr_value in deserialized_payload.items():
×
281
            setattr(instance, attr_name, attr_value)
×
282
        return instance
×
283

284
    # 2) Plain dict (no envelope) → recurse
285
    if isinstance(value, dict):
1✔
286
        return {k: _deserialize_value(v) for k, v in value.items()}
1✔
287

288
    # 3) Collections → recurse
289
    if isinstance(value, (list, tuple, set)):
1✔
290
        return type(value)(_deserialize_value(v) for v in value)
1✔
291

292
    # 4) Fallback (shouldn't usually happen with our schema)
293
    return value
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc