• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 14645042953

24 Apr 2025 03:06PM UTC coverage: 90.447% (-0.04%) from 90.482%
14645042953

Pull #9303

github

web-flow
Merge fdc9cc510 into f97472329
Pull Request #9303: fix: make `HuggingFaceAPIChatGenerator` convert Tool Call `arguments` from string

10860 of 12007 relevant lines covered (90.45%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.55
haystack/dataclasses/chat_message.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import json
1✔
6
from dataclasses import asdict, dataclass, field
1✔
7
from enum import Enum
1✔
8
from typing import Any, Dict, List, Optional, Sequence, Union
1✔
9

10
from haystack import logging
1✔
11

12
logger = logging.getLogger(__name__)
1✔
13

14

15
LEGACY_INIT_PARAMETERS = {"role", "content", "meta", "name"}
1✔
16

17

18
class ChatRole(str, Enum):
1✔
19
    """
20
    Enumeration representing the roles within a chat.
21
    """
22

23
    #: The user role. A message from the user contains only text.
24
    USER = "user"
1✔
25

26
    #: The system role. A message from the system contains only text.
27
    SYSTEM = "system"
1✔
28

29
    #: The assistant role. A message from the assistant can contain text and Tool calls. It can also store metadata.
30
    ASSISTANT = "assistant"
1✔
31

32
    #: The tool role. A message from a tool contains the result of a Tool invocation.
33
    TOOL = "tool"
1✔
34

35
    @staticmethod
1✔
36
    def from_str(string: str) -> "ChatRole":
1✔
37
        """
38
        Convert a string to a ChatRole enum.
39
        """
40
        enum_map = {e.value: e for e in ChatRole}
1✔
41
        role = enum_map.get(string)
1✔
42
        if role is None:
1✔
43
            msg = f"Unknown chat role '{string}'. Supported roles are: {list(enum_map.keys())}"
1✔
44
            raise ValueError(msg)
1✔
45
        return role
1✔
46

47

48
@dataclass
1✔
49
class ToolCall:
1✔
50
    """
51
    Represents a Tool call prepared by the model, usually contained in an assistant message.
52

53
    :param id: The ID of the Tool call.
54
    :param tool_name: The name of the Tool to call.
55
    :param arguments: The arguments to call the Tool with.
56
                     In most cases, this should be a dictionary. However, some LLM APIs might return
57
                     a JSON string instead. In those cases, the consumer of this class needs to handle
58
                     the conversion appropriately (e.g., in HuggingFaceAPIChatGenerator we convert
59
                     string arguments to dict before creating a ToolCall).
60
    """
61

62
    tool_name: str
1✔
63
    arguments: Dict[str, Any]
1✔
64
    id: Optional[str] = None  # noqa: A003
1✔
65

66

67
@dataclass
1✔
68
class ToolCallResult:
1✔
69
    """
70
    Represents the result of a Tool invocation.
71

72
    :param result: The result of the Tool invocation.
73
    :param origin: The Tool call that produced this result.
74
    :param error: Whether the Tool invocation resulted in an error.
75
    """
76

77
    result: str
1✔
78
    origin: ToolCall
1✔
79
    error: bool
1✔
80

81

82
@dataclass
1✔
83
class TextContent:
1✔
84
    """
85
    The textual content of a chat message.
86

87
    :param text: The text content of the message.
88
    """
89

90
    text: str
1✔
91

92

93
ChatMessageContentT = Union[TextContent, ToolCall, ToolCallResult]
1✔
94

95

96
def _deserialize_content(serialized_content: List[Dict[str, Any]]) -> List[ChatMessageContentT]:
1✔
97
    """
98
    Deserialize the `content` field of a serialized ChatMessage.
99

100
    :param serialized_content:
101
        The `content` field of a serialized ChatMessage (a list of dictionaries).
102

103
    :returns:
104
        Deserialized `content` field as a list of `ChatMessageContentT` objects.
105
    """
106
    content: List[ChatMessageContentT] = []
1✔
107

108
    for part in serialized_content:
1✔
109
        if "text" in part:
1✔
110
            content.append(TextContent(text=part["text"]))
1✔
111
        elif "tool_call" in part:
1✔
112
            content.append(ToolCall(**part["tool_call"]))
1✔
113
        elif "tool_call_result" in part:
1✔
114
            result = part["tool_call_result"]["result"]
1✔
115
            origin = ToolCall(**part["tool_call_result"]["origin"])
1✔
116
            error = part["tool_call_result"]["error"]
1✔
117
            tcr = ToolCallResult(result=result, origin=origin, error=error)
1✔
118
            content.append(tcr)
1✔
119
        else:
120
            raise ValueError(f"Unsupported part in serialized ChatMessage: `{part}`")
1✔
121

122
    return content
1✔
123

124

125
@dataclass
1✔
126
class ChatMessage:
1✔
127
    """
128
    Represents a message in a LLM chat conversation.
129

130
    Use the `from_assistant`, `from_user`, `from_system`, and `from_tool` class methods to create a ChatMessage.
131
    """
132

133
    _role: ChatRole
1✔
134
    _content: Sequence[ChatMessageContentT]
1✔
135
    _name: Optional[str] = None
1✔
136
    _meta: Dict[str, Any] = field(default_factory=dict, hash=False)
1✔
137

138
    def __new__(cls, *args, **kwargs):
1✔
139
        """
140
        This method is reimplemented to make the changes to the `ChatMessage` dataclass more visible.
141
        """
142

143
        general_msg = (
1✔
144
            "Use the `from_assistant`, `from_user`, `from_system`, and `from_tool` class methods to create a "
145
            "ChatMessage. For more information about the new API and how to migrate, see the documentation:"
146
            " https://docs.haystack.deepset.ai/docs/chatmessage"
147
        )
148

149
        if any(param in kwargs for param in LEGACY_INIT_PARAMETERS):
1✔
150
            raise TypeError(
1✔
151
                "The `role`, `content`, `meta`, and `name` init parameters of `ChatMessage` have been removed. "
152
                f"{general_msg}"
153
            )
154

155
        allowed_content_types = (TextContent, ToolCall, ToolCallResult)
1✔
156
        if len(args) > 1 and not isinstance(args[1], allowed_content_types):
1✔
157
            raise TypeError(
1✔
158
                "The `_content` parameter of `ChatMessage` must be one of the following types: "
159
                f"{', '.join(t.__name__ for t in allowed_content_types)}. "
160
                f"{general_msg}"
161
            )
162

163
        return super(ChatMessage, cls).__new__(cls)
1✔
164

165
    def __getattribute__(self, name):
1✔
166
        """
167
        This method is reimplemented to make the `content` attribute removal more visible.
168
        """
169

170
        if name == "content":
1✔
171
            msg = (
1✔
172
                "The `content` attribute of `ChatMessage` has been removed. "
173
                "Use the `text` property to access the textual value. "
174
                "For more information about the new API and how to migrate, see the documentation: "
175
                "https://docs.haystack.deepset.ai/docs/chatmessage"
176
            )
177
            raise AttributeError(msg)
1✔
178
        return object.__getattribute__(self, name)
1✔
179

180
    def __len__(self):
1✔
181
        return len(self._content)
1✔
182

183
    @property
1✔
184
    def role(self) -> ChatRole:
1✔
185
        """
186
        Returns the role of the entity sending the message.
187
        """
188
        return self._role
1✔
189

190
    @property
1✔
191
    def meta(self) -> Dict[str, Any]:
1✔
192
        """
193
        Returns the metadata associated with the message.
194
        """
195
        return self._meta
1✔
196

197
    @property
1✔
198
    def name(self) -> Optional[str]:
1✔
199
        """
200
        Returns the name associated with the message.
201
        """
202
        return self._name
1✔
203

204
    @property
1✔
205
    def texts(self) -> List[str]:
1✔
206
        """
207
        Returns the list of all texts contained in the message.
208
        """
209
        return [content.text for content in self._content if isinstance(content, TextContent)]
1✔
210

211
    @property
1✔
212
    def text(self) -> Optional[str]:
1✔
213
        """
214
        Returns the first text contained in the message.
215
        """
216
        if texts := self.texts:
1✔
217
            return texts[0]
1✔
218
        return None
1✔
219

220
    @property
1✔
221
    def tool_calls(self) -> List[ToolCall]:
1✔
222
        """
223
        Returns the list of all Tool calls contained in the message.
224
        """
225
        return [content for content in self._content if isinstance(content, ToolCall)]
1✔
226

227
    @property
1✔
228
    def tool_call(self) -> Optional[ToolCall]:
1✔
229
        """
230
        Returns the first Tool call contained in the message.
231
        """
232
        if tool_calls := self.tool_calls:
1✔
233
            return tool_calls[0]
1✔
234
        return None
1✔
235

236
    @property
1✔
237
    def tool_call_results(self) -> List[ToolCallResult]:
1✔
238
        """
239
        Returns the list of all Tool call results contained in the message.
240
        """
241
        return [content for content in self._content if isinstance(content, ToolCallResult)]
1✔
242

243
    @property
1✔
244
    def tool_call_result(self) -> Optional[ToolCallResult]:
1✔
245
        """
246
        Returns the first Tool call result contained in the message.
247
        """
248
        if tool_call_results := self.tool_call_results:
1✔
249
            return tool_call_results[0]
1✔
250
        return None
1✔
251

252
    def is_from(self, role: Union[ChatRole, str]) -> bool:
1✔
253
        """
254
        Check if the message is from a specific role.
255

256
        :param role: The role to check against.
257
        :returns: True if the message is from the specified role, False otherwise.
258
        """
259
        if isinstance(role, str):
1✔
260
            role = ChatRole.from_str(role)
1✔
261
        return self._role == role
1✔
262

263
    @classmethod
1✔
264
    def from_user(cls, text: str, meta: Optional[Dict[str, Any]] = None, name: Optional[str] = None) -> "ChatMessage":
1✔
265
        """
266
        Create a message from the user.
267

268
        :param text: The text content of the message.
269
        :param meta: Additional metadata associated with the message.
270
        :param name: An optional name for the participant. This field is only supported by OpenAI.
271
        :returns: A new ChatMessage instance.
272
        """
273
        return cls(_role=ChatRole.USER, _content=[TextContent(text=text)], _meta=meta or {}, _name=name)
1✔
274

275
    @classmethod
1✔
276
    def from_system(cls, text: str, meta: Optional[Dict[str, Any]] = None, name: Optional[str] = None) -> "ChatMessage":
1✔
277
        """
278
        Create a message from the system.
279

280
        :param text: The text content of the message.
281
        :param meta: Additional metadata associated with the message.
282
        :param name: An optional name for the participant. This field is only supported by OpenAI.
283
        :returns: A new ChatMessage instance.
284
        """
285
        return cls(_role=ChatRole.SYSTEM, _content=[TextContent(text=text)], _meta=meta or {}, _name=name)
1✔
286

287
    @classmethod
1✔
288
    def from_assistant(
1✔
289
        cls,
290
        text: Optional[str] = None,
291
        meta: Optional[Dict[str, Any]] = None,
292
        name: Optional[str] = None,
293
        tool_calls: Optional[List[ToolCall]] = None,
294
    ) -> "ChatMessage":
295
        """
296
        Create a message from the assistant.
297

298
        :param text: The text content of the message.
299
        :param meta: Additional metadata associated with the message.
300
        :param tool_calls: The Tool calls to include in the message.
301
        :param name: An optional name for the participant. This field is only supported by OpenAI.
302
        :returns: A new ChatMessage instance.
303
        """
304
        content: List[ChatMessageContentT] = []
1✔
305
        if text is not None:
1✔
306
            content.append(TextContent(text=text))
1✔
307
        if tool_calls:
1✔
308
            content.extend(tool_calls)
1✔
309

310
        return cls(_role=ChatRole.ASSISTANT, _content=content, _meta=meta or {}, _name=name)
1✔
311

312
    @classmethod
1✔
313
    def from_tool(
1✔
314
        cls, tool_result: str, origin: ToolCall, error: bool = False, meta: Optional[Dict[str, Any]] = None
315
    ) -> "ChatMessage":
316
        """
317
        Create a message from a Tool.
318

319
        :param tool_result: The result of the Tool invocation.
320
        :param origin: The Tool call that produced this result.
321
        :param error: Whether the Tool invocation resulted in an error.
322
        :param meta: Additional metadata associated with the message.
323
        :returns: A new ChatMessage instance.
324
        """
325
        return cls(
1✔
326
            _role=ChatRole.TOOL,
327
            _content=[ToolCallResult(result=tool_result, origin=origin, error=error)],
328
            _meta=meta or {},
329
        )
330

331
    def to_dict(self) -> Dict[str, Any]:
1✔
332
        """
333
        Converts ChatMessage into a dictionary.
334

335
        :returns:
336
            Serialized version of the object.
337
        """
338

339
        serialized: Dict[str, Any] = {}
1✔
340
        serialized["role"] = self._role.value
1✔
341
        serialized["meta"] = self._meta
1✔
342
        serialized["name"] = self._name
1✔
343
        content: List[Dict[str, Any]] = []
1✔
344
        for part in self._content:
1✔
345
            if isinstance(part, TextContent):
1✔
346
                content.append({"text": part.text})
1✔
347
            elif isinstance(part, ToolCall):
1✔
348
                content.append({"tool_call": asdict(part)})
1✔
349
            elif isinstance(part, ToolCallResult):
1✔
350
                content.append({"tool_call_result": asdict(part)})
1✔
351
            else:
352
                raise TypeError(f"Unsupported type in ChatMessage content: `{type(part).__name__}` for `{part}`.")
1✔
353

354
        serialized["content"] = content
1✔
355
        return serialized
1✔
356

357
    @classmethod
1✔
358
    def from_dict(cls, data: Dict[str, Any]) -> "ChatMessage":
1✔
359
        """
360
        Creates a new ChatMessage object from a dictionary.
361

362
        :param data:
363
            The dictionary to build the ChatMessage object.
364
        :returns:
365
            The created object.
366
        """
367
        if "content" in data:
1✔
368
            init_params: Dict[str, Any] = {
1✔
369
                "_role": ChatRole(data["role"]),
370
                "_name": data.get("name"),
371
                "_meta": data.get("meta") or {},
372
            }
373

374
            if isinstance(data["content"], list):
1✔
375
                # current format - the serialized `content` field is a list of dictionaries
376
                init_params["_content"] = _deserialize_content(data["content"])
1✔
377
            elif isinstance(data["content"], str):
1✔
378
                # pre 2.9.0 format - the `content` field is a string
379
                init_params["_content"] = [TextContent(text=data["content"])]
1✔
380
            else:
381
                raise TypeError(f"Unsupported content type in serialized ChatMessage: `{(data['content'])}`")
×
382
            return cls(**init_params)
1✔
383

384
        if "_content" in data:
1✔
385
            # format for versions >=2.9.0 and <2.12.0 - the serialized `_content` field is a list of dictionaries
386
            return cls(
1✔
387
                _role=ChatRole(data["_role"]),
388
                _content=_deserialize_content(data["_content"]),
389
                _name=data.get("_name"),
390
                _meta=data.get("_meta") or {},
391
            )
392

393
        raise ValueError(f"Missing 'content' or '_content' in serialized ChatMessage: `{data}`")
1✔
394

395
    def to_openai_dict_format(self) -> Dict[str, Any]:
1✔
396
        """
397
        Convert a ChatMessage to the dictionary format expected by OpenAI's Chat API.
398
        """
399
        text_contents = self.texts
1✔
400
        tool_calls = self.tool_calls
1✔
401
        tool_call_results = self.tool_call_results
1✔
402

403
        if not text_contents and not tool_calls and not tool_call_results:
1✔
404
            raise ValueError(
1✔
405
                "A `ChatMessage` must contain at least one `TextContent`, `ToolCall`, or `ToolCallResult`."
406
            )
407
        if len(text_contents) + len(tool_call_results) > 1:
1✔
408
            raise ValueError("A `ChatMessage` can only contain one `TextContent` or one `ToolCallResult`.")
1✔
409

410
        openai_msg: Dict[str, Any] = {"role": self._role.value}
1✔
411

412
        # Add name field if present
413
        if self._name is not None:
1✔
414
            openai_msg["name"] = self._name
1✔
415

416
        if tool_call_results:
1✔
417
            result = tool_call_results[0]
1✔
418
            if result.origin.id is None:
1✔
419
                raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.")
1✔
420
            openai_msg["content"] = result.result
1✔
421
            openai_msg["tool_call_id"] = result.origin.id
1✔
422
            # OpenAI does not provide a way to communicate errors in tool invocations, so we ignore the error field
423
            return openai_msg
1✔
424

425
        if text_contents:
1✔
426
            openai_msg["content"] = text_contents[0]
1✔
427
        if tool_calls:
1✔
428
            openai_tool_calls = []
1✔
429
            for tc in tool_calls:
1✔
430
                if tc.id is None:
1✔
431
                    raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.")
1✔
432
                openai_tool_calls.append(
1✔
433
                    {
434
                        "id": tc.id,
435
                        "type": "function",
436
                        # We disable ensure_ascii so special chars like emojis are not converted
437
                        "function": {"name": tc.tool_name, "arguments": json.dumps(tc.arguments, ensure_ascii=False)},
438
                    }
439
                )
440
            openai_msg["tool_calls"] = openai_tool_calls
1✔
441
        return openai_msg
1✔
442

443
    @staticmethod
1✔
444
    def _validate_openai_message(message: Dict[str, Any]) -> None:
1✔
445
        """
446
        Validate that a message dictionary follows OpenAI's Chat API format.
447

448
        :param message: The message dictionary to validate
449
        :raises ValueError: If the message format is invalid
450
        """
451
        if "role" not in message:
1✔
452
            raise ValueError("The `role` field is required in the message dictionary.")
1✔
453

454
        role = message["role"]
1✔
455
        content = message.get("content")
1✔
456
        tool_calls = message.get("tool_calls")
1✔
457

458
        if role not in ["assistant", "user", "system", "developer", "tool"]:
1✔
459
            raise ValueError(f"Unsupported role: {role}")
1✔
460

461
        if role == "assistant":
1✔
462
            if not content and not tool_calls:
1✔
463
                raise ValueError("For assistant messages, either `content` or `tool_calls` must be present.")
1✔
464
            if tool_calls:
1✔
465
                for tc in tool_calls:
1✔
466
                    if "function" not in tc:
1✔
467
                        raise ValueError("Tool calls must contain the `function` field")
1✔
468
        elif not content:
1✔
469
            raise ValueError(f"The `content` field is required for {role} messages.")
1✔
470

471
    @classmethod
1✔
472
    def from_openai_dict_format(cls, message: Dict[str, Any]) -> "ChatMessage":
1✔
473
        """
474
        Create a ChatMessage from a dictionary in the format expected by OpenAI's Chat API.
475

476
        NOTE: While OpenAI's API requires `tool_call_id` in both tool calls and tool messages, this method
477
        accepts messages without it to support shallow OpenAI-compatible APIs.
478
        If you plan to use the resulting ChatMessage with OpenAI, you must include `tool_call_id` or you'll
479
        encounter validation errors.
480

481
        :param message:
482
            The OpenAI dictionary to build the ChatMessage object.
483
        :returns:
484
            The created ChatMessage object.
485

486
        :raises ValueError:
487
            If the message dictionary is missing required fields.
488
        """
489
        cls._validate_openai_message(message)
1✔
490

491
        role = message["role"]
1✔
492
        content = message.get("content")
1✔
493
        name = message.get("name")
1✔
494
        tool_calls = message.get("tool_calls")
1✔
495
        tool_call_id = message.get("tool_call_id")
1✔
496

497
        if role == "assistant":
1✔
498
            haystack_tool_calls = None
1✔
499
            if tool_calls:
1✔
500
                haystack_tool_calls = []
1✔
501
                for tc in tool_calls:
1✔
502
                    haystack_tc = ToolCall(
1✔
503
                        id=tc.get("id"),
504
                        tool_name=tc["function"]["name"],
505
                        arguments=json.loads(tc["function"]["arguments"]),
506
                    )
507
                    haystack_tool_calls.append(haystack_tc)
1✔
508
            return cls.from_assistant(text=content, name=name, tool_calls=haystack_tool_calls)
1✔
509

510
        assert content is not None  # ensured by _validate_openai_message, but we need to make mypy happy
1✔
511

512
        if role == "user":
1✔
513
            return cls.from_user(text=content, name=name)
1✔
514
        if role in ["system", "developer"]:
1✔
515
            return cls.from_system(text=content, name=name)
1✔
516

517
        return cls.from_tool(
1✔
518
            tool_result=content, origin=ToolCall(id=tool_call_id, tool_name="", arguments={}), error=False
519
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc