• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 13972131258

20 Mar 2025 02:43PM UTC coverage: 90.021% (-0.03%) from 90.054%
13972131258

Pull #9069

github

web-flow
Merge 8371761b0 into 67ab3788e
Pull Request #9069: refactor!: `ChatMessage` serialization-deserialization updates

9833 of 10923 relevant lines covered (90.02%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.56
haystack/dataclasses/chat_message.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import inspect
1✔
6
import json
1✔
7
from dataclasses import asdict, dataclass, field
1✔
8
from enum import Enum
1✔
9
from typing import Any, Dict, List, Optional, Sequence, Union
1✔
10

11
from haystack import logging
1✔
12

13
logger = logging.getLogger(__name__)
1✔
14

15

16
LEGACY_INIT_PARAMETERS = {"role", "content", "meta", "name"}
1✔
17

18

19
class ChatRole(str, Enum):
1✔
20
    """
21
    Enumeration representing the roles within a chat.
22
    """
23

24
    #: The user role. A message from the user contains only text.
25
    USER = "user"
1✔
26

27
    #: The system role. A message from the system contains only text.
28
    SYSTEM = "system"
1✔
29

30
    #: The assistant role. A message from the assistant can contain text and Tool calls. It can also store metadata.
31
    ASSISTANT = "assistant"
1✔
32

33
    #: The tool role. A message from a tool contains the result of a Tool invocation.
34
    TOOL = "tool"
1✔
35

36
    @staticmethod
1✔
37
    def from_str(string: str) -> "ChatRole":
1✔
38
        """
39
        Convert a string to a ChatRole enum.
40
        """
41
        enum_map = {e.value: e for e in ChatRole}
1✔
42
        role = enum_map.get(string)
1✔
43
        if role is None:
1✔
44
            msg = f"Unknown chat role '{string}'. Supported roles are: {list(enum_map.keys())}"
1✔
45
            raise ValueError(msg)
1✔
46
        return role
1✔
47

48

49
@dataclass
1✔
50
class ToolCall:
1✔
51
    """
52
    Represents a Tool call prepared by the model, usually contained in an assistant message.
53

54
    :param id: The ID of the Tool call.
55
    :param tool_name: The name of the Tool to call.
56
    :param arguments: The arguments to call the Tool with.
57
    """
58

59
    tool_name: str
1✔
60
    arguments: Dict[str, Any]
1✔
61
    id: Optional[str] = None  # noqa: A003
1✔
62

63

64
@dataclass
1✔
65
class ToolCallResult:
1✔
66
    """
67
    Represents the result of a Tool invocation.
68

69
    :param result: The result of the Tool invocation.
70
    :param origin: The Tool call that produced this result.
71
    :param error: Whether the Tool invocation resulted in an error.
72
    """
73

74
    result: str
1✔
75
    origin: ToolCall
1✔
76
    error: bool
1✔
77

78

79
@dataclass
1✔
80
class TextContent:
1✔
81
    """
82
    The textual content of a chat message.
83

84
    :param text: The text content of the message.
85
    """
86

87
    text: str
1✔
88

89

90
ChatMessageContentT = Union[TextContent, ToolCall, ToolCallResult]
1✔
91

92

93
def _deserialize_content(serialized_content: List[Dict[str, Any]]) -> List[ChatMessageContentT]:
1✔
94
    """
95
    Deserialize the `content` field of a serialized ChatMessage.
96

97
    :param serialized_content:
98
        The `content` field of a serialized ChatMessage (a list of dictionaries).
99

100
    :returns:
101
        Deserialized `content` field as a list of `ChatMessageContentT` objects.
102
    """
103
    content: List[ChatMessageContentT] = []
1✔
104

105
    for part in serialized_content:
1✔
106
        if "text" in part:
1✔
107
            content.append(TextContent(text=part["text"]))
1✔
108
        elif "tool_call" in part:
1✔
109
            content.append(ToolCall(**part["tool_call"]))
1✔
110
        elif "tool_call_result" in part:
1✔
111
            result = part["tool_call_result"]["result"]
1✔
112
            origin = ToolCall(**part["tool_call_result"]["origin"])
1✔
113
            error = part["tool_call_result"]["error"]
1✔
114
            tcr = ToolCallResult(result=result, origin=origin, error=error)
1✔
115
            content.append(tcr)
1✔
116
        else:
117
            raise ValueError(f"Unsupported part in serialized ChatMessage: `{part}`")
1✔
118

119
    return content
1✔
120

121

122
@dataclass
1✔
123
class ChatMessage:
1✔
124
    """
125
    Represents a message in a LLM chat conversation.
126

127
    Use the `from_assistant`, `from_user`, `from_system`, and `from_tool` class methods to create a ChatMessage.
128
    """
129

130
    _role: ChatRole
1✔
131
    _content: Sequence[ChatMessageContentT]
1✔
132
    _name: Optional[str] = None
1✔
133
    _meta: Dict[str, Any] = field(default_factory=dict, hash=False)
1✔
134

135
    def __new__(cls, *args, **kwargs):
1✔
136
        """
137
        This method is reimplemented to make the changes to the `ChatMessage` dataclass more visible.
138
        """
139

140
        general_msg = (
1✔
141
            "Use the `from_assistant`, `from_user`, `from_system`, and `from_tool` class methods to create a "
142
            "ChatMessage. For more information about the new API and how to migrate, see the documentation:"
143
            " https://docs.haystack.deepset.ai/docs/chatmessage"
144
        )
145

146
        if any(param in kwargs for param in LEGACY_INIT_PARAMETERS):
1✔
147
            raise TypeError(
1✔
148
                "The `role`, `content`, `meta`, and `name` init parameters of `ChatMessage` have been removed. "
149
                f"{general_msg}"
150
            )
151

152
        allowed_content_types = (TextContent, ToolCall, ToolCallResult)
1✔
153
        if len(args) > 1 and not isinstance(args[1], allowed_content_types):
1✔
154
            raise TypeError(
1✔
155
                "The `_content` parameter of `ChatMessage` must be one of the following types: "
156
                f"{', '.join(t.__name__ for t in allowed_content_types)}. "
157
                f"{general_msg}"
158
            )
159

160
        return super(ChatMessage, cls).__new__(cls)
1✔
161

162
    def __getattribute__(self, name):
1✔
163
        """
164
        This method is reimplemented to make the `content` attribute removal more visible.
165
        """
166

167
        if name == "content":
1✔
168
            msg = (
1✔
169
                "The `content` attribute of `ChatMessage` has been removed. "
170
                "Use the `text` property to access the textual value. "
171
                "For more information about the new API and how to migrate, see the documentation: "
172
                "https://docs.haystack.deepset.ai/docs/chatmessage"
173
            )
174
            raise AttributeError(msg)
1✔
175
        return object.__getattribute__(self, name)
1✔
176

177
    def __len__(self):
1✔
178
        return len(self._content)
1✔
179

180
    @property
1✔
181
    def role(self) -> ChatRole:
1✔
182
        """
183
        Returns the role of the entity sending the message.
184
        """
185
        return self._role
1✔
186

187
    @property
1✔
188
    def meta(self) -> Dict[str, Any]:
1✔
189
        """
190
        Returns the metadata associated with the message.
191
        """
192
        return self._meta
1✔
193

194
    @property
1✔
195
    def name(self) -> Optional[str]:
1✔
196
        """
197
        Returns the name associated with the message.
198
        """
199
        return self._name
1✔
200

201
    @property
1✔
202
    def texts(self) -> List[str]:
1✔
203
        """
204
        Returns the list of all texts contained in the message.
205
        """
206
        return [content.text for content in self._content if isinstance(content, TextContent)]
1✔
207

208
    @property
1✔
209
    def text(self) -> Optional[str]:
1✔
210
        """
211
        Returns the first text contained in the message.
212
        """
213
        if texts := self.texts:
1✔
214
            return texts[0]
1✔
215
        return None
1✔
216

217
    @property
1✔
218
    def tool_calls(self) -> List[ToolCall]:
1✔
219
        """
220
        Returns the list of all Tool calls contained in the message.
221
        """
222
        return [content for content in self._content if isinstance(content, ToolCall)]
1✔
223

224
    @property
1✔
225
    def tool_call(self) -> Optional[ToolCall]:
1✔
226
        """
227
        Returns the first Tool call contained in the message.
228
        """
229
        if tool_calls := self.tool_calls:
1✔
230
            return tool_calls[0]
1✔
231
        return None
1✔
232

233
    @property
1✔
234
    def tool_call_results(self) -> List[ToolCallResult]:
1✔
235
        """
236
        Returns the list of all Tool call results contained in the message.
237
        """
238
        return [content for content in self._content if isinstance(content, ToolCallResult)]
1✔
239

240
    @property
1✔
241
    def tool_call_result(self) -> Optional[ToolCallResult]:
1✔
242
        """
243
        Returns the first Tool call result contained in the message.
244
        """
245
        if tool_call_results := self.tool_call_results:
1✔
246
            return tool_call_results[0]
1✔
247
        return None
1✔
248

249
    def is_from(self, role: Union[ChatRole, str]) -> bool:
1✔
250
        """
251
        Check if the message is from a specific role.
252

253
        :param role: The role to check against.
254
        :returns: True if the message is from the specified role, False otherwise.
255
        """
256
        if isinstance(role, str):
1✔
257
            role = ChatRole.from_str(role)
1✔
258
        return self._role == role
1✔
259

260
    @classmethod
1✔
261
    def from_user(cls, text: str, meta: Optional[Dict[str, Any]] = None, name: Optional[str] = None) -> "ChatMessage":
1✔
262
        """
263
        Create a message from the user.
264

265
        :param text: The text content of the message.
266
        :param meta: Additional metadata associated with the message.
267
        :param name: An optional name for the participant. This field is only supported by OpenAI.
268
        :returns: A new ChatMessage instance.
269
        """
270
        return cls(_role=ChatRole.USER, _content=[TextContent(text=text)], _meta=meta or {}, _name=name)
1✔
271

272
    @classmethod
1✔
273
    def from_system(cls, text: str, meta: Optional[Dict[str, Any]] = None, name: Optional[str] = None) -> "ChatMessage":
1✔
274
        """
275
        Create a message from the system.
276

277
        :param text: The text content of the message.
278
        :param meta: Additional metadata associated with the message.
279
        :param name: An optional name for the participant. This field is only supported by OpenAI.
280
        :returns: A new ChatMessage instance.
281
        """
282
        return cls(_role=ChatRole.SYSTEM, _content=[TextContent(text=text)], _meta=meta or {}, _name=name)
1✔
283

284
    @classmethod
1✔
285
    def from_assistant(
1✔
286
        cls,
287
        text: Optional[str] = None,
288
        meta: Optional[Dict[str, Any]] = None,
289
        name: Optional[str] = None,
290
        tool_calls: Optional[List[ToolCall]] = None,
291
    ) -> "ChatMessage":
292
        """
293
        Create a message from the assistant.
294

295
        :param text: The text content of the message.
296
        :param meta: Additional metadata associated with the message.
297
        :param tool_calls: The Tool calls to include in the message.
298
        :param name: An optional name for the participant. This field is only supported by OpenAI.
299
        :returns: A new ChatMessage instance.
300
        """
301
        content: List[ChatMessageContentT] = []
1✔
302
        if text is not None:
1✔
303
            content.append(TextContent(text=text))
1✔
304
        if tool_calls:
1✔
305
            content.extend(tool_calls)
1✔
306

307
        return cls(_role=ChatRole.ASSISTANT, _content=content, _meta=meta or {}, _name=name)
1✔
308

309
    @classmethod
1✔
310
    def from_tool(
1✔
311
        cls, tool_result: str, origin: ToolCall, error: bool = False, meta: Optional[Dict[str, Any]] = None
312
    ) -> "ChatMessage":
313
        """
314
        Create a message from a Tool.
315

316
        :param tool_result: The result of the Tool invocation.
317
        :param origin: The Tool call that produced this result.
318
        :param error: Whether the Tool invocation resulted in an error.
319
        :param meta: Additional metadata associated with the message.
320
        :returns: A new ChatMessage instance.
321
        """
322
        return cls(
1✔
323
            _role=ChatRole.TOOL,
324
            _content=[ToolCallResult(result=tool_result, origin=origin, error=error)],
325
            _meta=meta or {},
326
        )
327

328
    def to_dict(self) -> Dict[str, Any]:
1✔
329
        """
330
        Converts ChatMessage into a dictionary.
331

332
        :returns:
333
            Serialized version of the object.
334
        """
335

336
        # We don't want to show the warning if ChatMessage.to_dict is used in pipeline serialization
337
        used_in_pipeline_serialization = any(
1✔
338
            frame.function == "component_to_dict" and "serialization.py" in frame.filename for frame in inspect.stack()
339
        )
340
        if not used_in_pipeline_serialization:
1✔
341
            logger.warning(
1✔
342
                "Starting from Haystack 2.12.0, ChatMessage.to_dict returns a dictionary with keys 'role', "
343
                "'meta', 'name', and 'content' instead of '_role', '_meta', '_name', and '_content'. "
344
                "If your code consumes this dictionary, please update it to use the new format."
345
            )
346

347
        serialized: Dict[str, Any] = {}
1✔
348
        serialized["role"] = self._role.value
1✔
349
        serialized["meta"] = self._meta
1✔
350
        serialized["name"] = self._name
1✔
351
        content: List[Dict[str, Any]] = []
1✔
352
        for part in self._content:
1✔
353
            if isinstance(part, TextContent):
1✔
354
                content.append({"text": part.text})
1✔
355
            elif isinstance(part, ToolCall):
1✔
356
                content.append({"tool_call": asdict(part)})
1✔
357
            elif isinstance(part, ToolCallResult):
1✔
358
                content.append({"tool_call_result": asdict(part)})
1✔
359
            else:
360
                raise TypeError(f"Unsupported type in ChatMessage content: `{type(part).__name__}` for `{part}`.")
1✔
361

362
        serialized["content"] = content
1✔
363
        return serialized
1✔
364

365
    @classmethod
1✔
366
    def from_dict(cls, data: Dict[str, Any]) -> "ChatMessage":
1✔
367
        """
368
        Creates a new ChatMessage object from a dictionary.
369

370
        :param data:
371
            The dictionary to build the ChatMessage object.
372
        :returns:
373
            The created object.
374
        """
375
        if "content" in data:
1✔
376
            init_params = {"_role": ChatRole(data["role"]), "_name": data["name"], "_meta": data["meta"]}
1✔
377

378
            if isinstance(data["content"], list):
1✔
379
                # current format - the serialized `content` field is a list of dictionaries
380
                init_params["_content"] = _deserialize_content(data["content"])
1✔
381
            elif isinstance(data["content"], str):
1✔
382
                # pre 2.9.0 format - the `content` field is a string
383
                init_params["_content"] = [TextContent(text=data["content"])]
1✔
384
            else:
385
                raise TypeError(f"Unsupported content type in serialized ChatMessage: `{(data['content'])}`")
×
386
            return cls(**init_params)
1✔
387

388
        if "_content" in data:
1✔
389
            # format for versions >=2.9.0 and <2.12.0 - the serialized `_content` field is a list of dictionaries
390
            return cls(
1✔
391
                _role=ChatRole(data["_role"]),
392
                _content=_deserialize_content(data["_content"]),
393
                _name=data["_name"],
394
                _meta=data["_meta"],
395
            )
396

397
        raise ValueError(f"Missing 'content' or '_content' in serialized ChatMessage: `{data}`")
1✔
398

399
    def to_openai_dict_format(self) -> Dict[str, Any]:
1✔
400
        """
401
        Convert a ChatMessage to the dictionary format expected by OpenAI's Chat API.
402
        """
403
        text_contents = self.texts
1✔
404
        tool_calls = self.tool_calls
1✔
405
        tool_call_results = self.tool_call_results
1✔
406

407
        if not text_contents and not tool_calls and not tool_call_results:
1✔
408
            raise ValueError(
1✔
409
                "A `ChatMessage` must contain at least one `TextContent`, `ToolCall`, or `ToolCallResult`."
410
            )
411
        if len(text_contents) + len(tool_call_results) > 1:
1✔
412
            raise ValueError("A `ChatMessage` can only contain one `TextContent` or one `ToolCallResult`.")
1✔
413

414
        openai_msg: Dict[str, Any] = {"role": self._role.value}
1✔
415

416
        # Add name field if present
417
        if self._name is not None:
1✔
418
            openai_msg["name"] = self._name
1✔
419

420
        if tool_call_results:
1✔
421
            result = tool_call_results[0]
1✔
422
            if result.origin.id is None:
1✔
423
                raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.")
1✔
424
            openai_msg["content"] = result.result
1✔
425
            openai_msg["tool_call_id"] = result.origin.id
1✔
426
            # OpenAI does not provide a way to communicate errors in tool invocations, so we ignore the error field
427
            return openai_msg
1✔
428

429
        if text_contents:
1✔
430
            openai_msg["content"] = text_contents[0]
1✔
431
        if tool_calls:
1✔
432
            openai_tool_calls = []
1✔
433
            for tc in tool_calls:
1✔
434
                if tc.id is None:
1✔
435
                    raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.")
1✔
436
                openai_tool_calls.append(
1✔
437
                    {
438
                        "id": tc.id,
439
                        "type": "function",
440
                        # We disable ensure_ascii so special chars like emojis are not converted
441
                        "function": {"name": tc.tool_name, "arguments": json.dumps(tc.arguments, ensure_ascii=False)},
442
                    }
443
                )
444
            openai_msg["tool_calls"] = openai_tool_calls
1✔
445
        return openai_msg
1✔
446

447
    @staticmethod
1✔
448
    def _validate_openai_message(message: Dict[str, Any]) -> None:
1✔
449
        """
450
        Validate that a message dictionary follows OpenAI's Chat API format.
451

452
        :param message: The message dictionary to validate
453
        :raises ValueError: If the message format is invalid
454
        """
455
        if "role" not in message:
1✔
456
            raise ValueError("The `role` field is required in the message dictionary.")
1✔
457

458
        role = message["role"]
1✔
459
        content = message.get("content")
1✔
460
        tool_calls = message.get("tool_calls")
1✔
461

462
        if role not in ["assistant", "user", "system", "developer", "tool"]:
1✔
463
            raise ValueError(f"Unsupported role: {role}")
1✔
464

465
        if role == "assistant":
1✔
466
            if not content and not tool_calls:
1✔
467
                raise ValueError("For assistant messages, either `content` or `tool_calls` must be present.")
1✔
468
            if tool_calls:
1✔
469
                for tc in tool_calls:
1✔
470
                    if "function" not in tc:
1✔
471
                        raise ValueError("Tool calls must contain the `function` field")
1✔
472
        elif not content:
1✔
473
            raise ValueError(f"The `content` field is required for {role} messages.")
1✔
474

475
    @classmethod
1✔
476
    def from_openai_dict_format(cls, message: Dict[str, Any]) -> "ChatMessage":
1✔
477
        """
478
        Create a ChatMessage from a dictionary in the format expected by OpenAI's Chat API.
479

480
        NOTE: While OpenAI's API requires `tool_call_id` in both tool calls and tool messages, this method
481
        accepts messages without it to support shallow OpenAI-compatible APIs.
482
        If you plan to use the resulting ChatMessage with OpenAI, you must include `tool_call_id` or you'll
483
        encounter validation errors.
484

485
        :param message:
486
            The OpenAI dictionary to build the ChatMessage object.
487
        :returns:
488
            The created ChatMessage object.
489

490
        :raises ValueError:
491
            If the message dictionary is missing required fields.
492
        """
493
        cls._validate_openai_message(message)
1✔
494

495
        role = message["role"]
1✔
496
        content = message.get("content")
1✔
497
        name = message.get("name")
1✔
498
        tool_calls = message.get("tool_calls")
1✔
499
        tool_call_id = message.get("tool_call_id")
1✔
500

501
        if role == "assistant":
1✔
502
            haystack_tool_calls = None
1✔
503
            if tool_calls:
1✔
504
                haystack_tool_calls = []
1✔
505
                for tc in tool_calls:
1✔
506
                    haystack_tc = ToolCall(
1✔
507
                        id=tc.get("id"),
508
                        tool_name=tc["function"]["name"],
509
                        arguments=json.loads(tc["function"]["arguments"]),
510
                    )
511
                    haystack_tool_calls.append(haystack_tc)
1✔
512
            return cls.from_assistant(text=content, name=name, tool_calls=haystack_tool_calls)
1✔
513

514
        assert content is not None  # ensured by _validate_openai_message, but we need to make mypy happy
1✔
515

516
        if role == "user":
1✔
517
            return cls.from_user(text=content, name=name)
1✔
518
        if role in ["system", "developer"]:
1✔
519
            return cls.from_system(text=content, name=name)
1✔
520

521
        return cls.from_tool(
1✔
522
            tool_result=content, origin=ToolCall(id=tool_call_id, tool_name="", arguments={}), error=False
523
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc