• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 15018133817

14 May 2025 10:15AM UTC coverage: 90.467% (+0.05%) from 90.417%
15018133817

Pull #9342

github

web-flow
Merge 6c290fea7 into 42b378950
Pull Request #9342: Fix component tool parameters

10932 of 12084 relevant lines covered (90.47%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.7
haystack/components/generators/chat/hugging_face_api.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import json
1✔
6
from datetime import datetime
1✔
7
from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Union
1✔
8

9
from haystack import component, default_from_dict, default_to_dict, logging
1✔
10
from haystack.dataclasses import ChatMessage, StreamingChunk, ToolCall, select_streaming_callback
1✔
11
from haystack.dataclasses.streaming_chunk import StreamingCallbackT
1✔
12
from haystack.lazy_imports import LazyImport
1✔
13
from haystack.tools import (
1✔
14
    Tool,
15
    Toolset,
16
    _check_duplicate_tool_names,
17
    deserialize_tools_or_toolset_inplace,
18
    serialize_tools_or_toolset,
19
)
20
from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
1✔
21
from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model, convert_message_to_hf_format
1✔
22
from haystack.utils.url_validation import is_valid_http_url
1✔
23

24
logger = logging.getLogger(__name__)
1✔
25

26
with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.27.0\"'") as huggingface_hub_import:
1✔
27
    from huggingface_hub import (
1✔
28
        AsyncInferenceClient,
29
        ChatCompletionInputFunctionDefinition,
30
        ChatCompletionInputTool,
31
        ChatCompletionOutput,
32
        ChatCompletionOutputToolCall,
33
        ChatCompletionStreamOutput,
34
        InferenceClient,
35
    )
36

37

38
def _convert_hfapi_tool_calls(hfapi_tool_calls: Optional[List["ChatCompletionOutputToolCall"]]) -> List[ToolCall]:
1✔
39
    """
40
    Convert HuggingFace API tool calls to a list of Haystack ToolCall.
41

42
    :param hfapi_tool_calls: The HuggingFace API tool calls to convert.
43
    :returns: A list of ToolCall objects.
44

45
    """
46
    if not hfapi_tool_calls:
1✔
47
        return []
1✔
48

49
    tool_calls = []
1✔
50

51
    for hfapi_tc in hfapi_tool_calls:
1✔
52
        hf_arguments = hfapi_tc.function.arguments
1✔
53

54
        arguments = None
1✔
55
        if isinstance(hf_arguments, dict):
1✔
56
            arguments = hf_arguments
1✔
57
        elif isinstance(hf_arguments, str):
1✔
58
            try:
1✔
59
                arguments = json.loads(hf_arguments)
1✔
60
            except json.JSONDecodeError:
1✔
61
                logger.warning(
1✔
62
                    "HuggingFace API returned a malformed JSON string for tool call arguments. This tool call "
63
                    "will be skipped. Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
64
                    _id=hfapi_tc.id,
65
                    _name=hfapi_tc.function.name,
66
                    _arguments=hf_arguments,
67
                )
68
        else:
69
            logger.warning(
1✔
70
                "HuggingFace API returned tool call arguments of type {_type}. Valid types are dict and str. This tool "
71
                "call will be skipped. Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
72
                _id=hfapi_tc.id,
73
                _name=hfapi_tc.function.name,
74
                _arguments=hf_arguments,
75
            )
76

77
        if arguments:
1✔
78
            tool_calls.append(ToolCall(tool_name=hfapi_tc.function.name, arguments=arguments, id=hfapi_tc.id))
1✔
79

80
    return tool_calls
1✔
81

82

83
def _convert_tools_to_hfapi_tools(
1✔
84
    tools: Optional[Union[List[Tool], Toolset]],
85
) -> Optional[List["ChatCompletionInputTool"]]:
86
    if not tools:
1✔
87
        return None
1✔
88

89
    # huggingface_hub<0.31.0 uses "arguments", huggingface_hub>=0.31.0 uses "parameters"
90
    parameters_name = "arguments" if hasattr(ChatCompletionInputFunctionDefinition, "arguments") else "parameters"
1✔
91

92
    hf_tools = []
1✔
93
    for tool in tools:
1✔
94
        hf_tools_args = {"name": tool.name, "description": tool.description, parameters_name: tool.parameters}
1✔
95

96
        hf_tools.append(
1✔
97
            ChatCompletionInputTool(function=ChatCompletionInputFunctionDefinition(**hf_tools_args), type="function")
98
        )
99

100
    return hf_tools
1✔
101

102

103
@component
1✔
104
class HuggingFaceAPIChatGenerator:
1✔
105
    """
106
    Completes chats using Hugging Face APIs.
107

108
    HuggingFaceAPIChatGenerator uses the [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
109
    format for input and output. Use it to generate text with Hugging Face APIs:
110
    - [Free Serverless Inference API](https://huggingface.co/inference-api)
111
    - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
112
    - [Self-hosted Text Generation Inference](https://github.com/huggingface/text-generation-inference)
113

114
    ### Usage examples
115

116
    #### With the free serverless inference API
117

118
    ```python
119
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
120
    from haystack.dataclasses import ChatMessage
121
    from haystack.utils import Secret
122
    from haystack.utils.hf import HFGenerationAPIType
123

124
    messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"),
125
                ChatMessage.from_user("What's Natural Language Processing?")]
126

127
    # the api_type can be expressed using the HFGenerationAPIType enum or as a string
128
    api_type = HFGenerationAPIType.SERVERLESS_INFERENCE_API
129
    api_type = "serverless_inference_api" # this is equivalent to the above
130

131
    generator = HuggingFaceAPIChatGenerator(api_type=api_type,
132
                                            api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
133
                                            token=Secret.from_token("<your-api-key>"))
134

135
    result = generator.run(messages)
136
    print(result)
137
    ```
138

139
    #### With paid inference endpoints
140

141
    ```python
142
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
143
    from haystack.dataclasses import ChatMessage
144
    from haystack.utils import Secret
145

146
    messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"),
147
                ChatMessage.from_user("What's Natural Language Processing?")]
148

149
    generator = HuggingFaceAPIChatGenerator(api_type="inference_endpoints",
150
                                            api_params={"url": "<your-inference-endpoint-url>"},
151
                                            token=Secret.from_token("<your-api-key>"))
152

153
    result = generator.run(messages)
154
    print(result)
155

156
    #### With self-hosted text generation inference
157

158
    ```python
159
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
160
    from haystack.dataclasses import ChatMessage
161

162
    messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"),
163
                ChatMessage.from_user("What's Natural Language Processing?")]
164

165
    generator = HuggingFaceAPIChatGenerator(api_type="text_generation_inference",
166
                                            api_params={"url": "http://localhost:8080"})
167

168
    result = generator.run(messages)
169
    print(result)
170
    ```
171
    """
172

173
    def __init__(  # pylint: disable=too-many-positional-arguments
1✔
174
        self,
175
        api_type: Union[HFGenerationAPIType, str],
176
        api_params: Dict[str, str],
177
        token: Optional[Secret] = Secret.from_env_var(["HF_API_TOKEN", "HF_TOKEN"], strict=False),
178
        generation_kwargs: Optional[Dict[str, Any]] = None,
179
        stop_words: Optional[List[str]] = None,
180
        streaming_callback: Optional[StreamingCallbackT] = None,
181
        tools: Optional[Union[List[Tool], Toolset]] = None,
182
    ):
183
        """
184
        Initialize the HuggingFaceAPIChatGenerator instance.
185

186
        :param api_type:
187
            The type of Hugging Face API to use. Available types:
188
            - `text_generation_inference`: See [TGI](https://github.com/huggingface/text-generation-inference).
189
            - `inference_endpoints`: See [Inference Endpoints](https://huggingface.co/inference-endpoints).
190
            - `serverless_inference_api`: See [Serverless Inference API](https://huggingface.co/inference-api).
191
        :param api_params:
192
            A dictionary with the following keys:
193
            - `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`.
194
            - `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or
195
            `TEXT_GENERATION_INFERENCE`.
196
        :param token:
197
            The Hugging Face token to use as HTTP bearer authorization.
198
            Check your HF token in your [account settings](https://huggingface.co/settings/tokens).
199
        :param generation_kwargs:
200
            A dictionary with keyword arguments to customize text generation.
201
                Some examples: `max_tokens`, `temperature`, `top_p`.
202
                For details, see [Hugging Face chat_completion documentation](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
203
        :param stop_words:
204
            An optional list of strings representing the stop words.
205
        :param streaming_callback:
206
            An optional callable for handling streaming responses.
207
        :param tools:
208
            A list of tools or a Toolset for which the model can prepare calls.
209
            The chosen model should support tool/function calling, according to the model card.
210
            Support for tools in the Hugging Face API and TGI is not yet fully refined and you may experience
211
            unexpected behavior. This parameter can accept either a list of `Tool` objects or a `Toolset` instance.
212
        """
213

214
        huggingface_hub_import.check()
1✔
215

216
        if isinstance(api_type, str):
1✔
217
            api_type = HFGenerationAPIType.from_str(api_type)
1✔
218

219
        if api_type == HFGenerationAPIType.SERVERLESS_INFERENCE_API:
1✔
220
            model = api_params.get("model")
1✔
221
            if model is None:
1✔
222
                raise ValueError(
1✔
223
                    "To use the Serverless Inference API, you need to specify the `model` parameter in `api_params`."
224
                )
225
            check_valid_model(model, HFModelType.GENERATION, token)
1✔
226
            model_or_url = model
1✔
227
        elif api_type in [HFGenerationAPIType.INFERENCE_ENDPOINTS, HFGenerationAPIType.TEXT_GENERATION_INFERENCE]:
1✔
228
            url = api_params.get("url")
1✔
229
            if url is None:
1✔
230
                msg = (
1✔
231
                    "To use Text Generation Inference or Inference Endpoints, you need to specify the `url` parameter "
232
                    "in `api_params`."
233
                )
234
                raise ValueError(msg)
1✔
235
            if not is_valid_http_url(url):
1✔
236
                raise ValueError(f"Invalid URL: {url}")
1✔
237
            model_or_url = url
1✔
238
        else:
239
            msg = f"Unknown api_type {api_type}"
×
240
            raise ValueError(msg)
×
241

242
        if tools and streaming_callback is not None:
1✔
243
            raise ValueError("Using tools and streaming at the same time is not supported. Please choose one.")
1✔
244
        _check_duplicate_tool_names(list(tools or []))
1✔
245

246
        # handle generation kwargs setup
247
        generation_kwargs = generation_kwargs.copy() if generation_kwargs else {}
1✔
248
        generation_kwargs["stop"] = generation_kwargs.get("stop", [])
1✔
249
        generation_kwargs["stop"].extend(stop_words or [])
1✔
250
        generation_kwargs.setdefault("max_tokens", 512)
1✔
251

252
        self.api_type = api_type
1✔
253
        self.api_params = api_params
1✔
254
        self.token = token
1✔
255
        self.generation_kwargs = generation_kwargs
1✔
256
        self.streaming_callback = streaming_callback
1✔
257
        self._client = InferenceClient(model_or_url, token=token.resolve_value() if token else None)
1✔
258
        self._async_client = AsyncInferenceClient(model_or_url, token=token.resolve_value() if token else None)
1✔
259
        self.tools = tools
1✔
260

261
    def to_dict(self) -> Dict[str, Any]:
1✔
262
        """
263
        Serialize this component to a dictionary.
264

265
        :returns:
266
            A dictionary containing the serialized component.
267
        """
268
        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
1✔
269
        return default_to_dict(
1✔
270
            self,
271
            api_type=str(self.api_type),
272
            api_params=self.api_params,
273
            token=self.token.to_dict() if self.token else None,
274
            generation_kwargs=self.generation_kwargs,
275
            streaming_callback=callback_name,
276
            tools=serialize_tools_or_toolset(self.tools),
277
        )
278

279
    @classmethod
1✔
280
    def from_dict(cls, data: Dict[str, Any]) -> "HuggingFaceAPIChatGenerator":
1✔
281
        """
282
        Deserialize this component from a dictionary.
283
        """
284
        deserialize_secrets_inplace(data["init_parameters"], keys=["token"])
1✔
285
        deserialize_tools_or_toolset_inplace(data["init_parameters"], key="tools")
1✔
286
        init_params = data.get("init_parameters", {})
1✔
287
        serialized_callback_handler = init_params.get("streaming_callback")
1✔
288
        if serialized_callback_handler:
1✔
289
            data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
×
290
        return default_from_dict(cls, data)
1✔
291

292
    @component.output_types(replies=List[ChatMessage])
1✔
293
    def run(
1✔
294
        self,
295
        messages: List[ChatMessage],
296
        generation_kwargs: Optional[Dict[str, Any]] = None,
297
        tools: Optional[Union[List[Tool], Toolset]] = None,
298
        streaming_callback: Optional[StreamingCallbackT] = None,
299
    ):
300
        """
301
        Invoke the text generation inference based on the provided messages and generation parameters.
302

303
        :param messages:
304
            A list of ChatMessage objects representing the input messages.
305
        :param generation_kwargs:
306
            Additional keyword arguments for text generation.
307
        :param tools:
308
            A list of tools or a Toolset for which the model can prepare calls. If set, it will override
309
            the `tools` parameter set during component initialization. This parameter can accept either a
310
            list of `Tool` objects or a `Toolset` instance.
311
        :param streaming_callback:
312
            An optional callable for handling streaming responses. If set, it will override the `streaming_callback`
313
            parameter set during component initialization.
314
        :returns: A dictionary with the following keys:
315
            - `replies`: A list containing the generated responses as ChatMessage objects.
316
        """
317

318
        # update generation kwargs by merging with the default ones
319
        generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
1✔
320

321
        formatted_messages = [convert_message_to_hf_format(message) for message in messages]
1✔
322

323
        tools = tools or self.tools
1✔
324
        if tools and self.streaming_callback:
1✔
325
            raise ValueError("Using tools and streaming at the same time is not supported. Please choose one.")
1✔
326
        _check_duplicate_tool_names(list(tools or []))
1✔
327

328
        # validate and select the streaming callback
329
        streaming_callback = select_streaming_callback(
1✔
330
            self.streaming_callback, streaming_callback, requires_async=False
331
        )
332

333
        if streaming_callback:
1✔
334
            return self._run_streaming(formatted_messages, generation_kwargs, streaming_callback)
1✔
335

336
        if tools and isinstance(tools, Toolset):
1✔
337
            tools = list(tools)
×
338

339
        hf_tools = _convert_tools_to_hfapi_tools(tools)
1✔
340

341
        return self._run_non_streaming(formatted_messages, generation_kwargs, hf_tools)
1✔
342

343
    @component.output_types(replies=List[ChatMessage])
1✔
344
    async def run_async(
1✔
345
        self,
346
        messages: List[ChatMessage],
347
        generation_kwargs: Optional[Dict[str, Any]] = None,
348
        tools: Optional[Union[List[Tool], Toolset]] = None,
349
        streaming_callback: Optional[StreamingCallbackT] = None,
350
    ):
351
        """
352
        Asynchronously invokes the text generation inference based on the provided messages and generation parameters.
353

354
        This is the asynchronous version of the `run` method. It has the same parameters
355
        and return values but can be used with `await` in an async code.
356

357
        :param messages:
358
            A list of ChatMessage objects representing the input messages.
359
        :param generation_kwargs:
360
            Additional keyword arguments for text generation.
361
        :param tools:
362
            A list of tools or a Toolset for which the model can prepare calls. If set, it will override the `tools`
363
            parameter set during component initialization. This parameter can accept either a list of `Tool` objects
364
            or a `Toolset` instance.
365
        :param streaming_callback:
366
            An optional callable for handling streaming responses. If set, it will override the `streaming_callback`
367
            parameter set during component initialization.
368
        :returns: A dictionary with the following keys:
369
            - `replies`: A list containing the generated responses as ChatMessage objects.
370
        """
371

372
        # update generation kwargs by merging with the default ones
373
        generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
1✔
374

375
        formatted_messages = [convert_message_to_hf_format(message) for message in messages]
1✔
376

377
        tools = tools or self.tools
1✔
378
        if tools and self.streaming_callback:
1✔
379
            raise ValueError("Using tools and streaming at the same time is not supported. Please choose one.")
×
380
        _check_duplicate_tool_names(list(tools or []))
1✔
381

382
        # validate and select the streaming callback
383
        streaming_callback = select_streaming_callback(self.streaming_callback, streaming_callback, requires_async=True)
1✔
384

385
        if streaming_callback:
1✔
386
            return await self._run_streaming_async(formatted_messages, generation_kwargs, streaming_callback)
1✔
387

388
        if tools and isinstance(tools, Toolset):
1✔
389
            tools = list(tools)
×
390

391
        hf_tools = _convert_tools_to_hfapi_tools(tools)
1✔
392

393
        return await self._run_non_streaming_async(formatted_messages, generation_kwargs, hf_tools)
1✔
394

395
    def _run_streaming(
1✔
396
        self, messages: List[Dict[str, str]], generation_kwargs: Dict[str, Any], streaming_callback: StreamingCallbackT
397
    ):
398
        api_output: Iterable[ChatCompletionStreamOutput] = self._client.chat_completion(
1✔
399
            messages, stream=True, **generation_kwargs
400
        )
401

402
        generated_text = ""
1✔
403
        first_chunk_time = None
1✔
404
        meta: Dict[str, Any] = {}
1✔
405

406
        for chunk in api_output:
1✔
407
            # n is unused, so the API always returns only one choice
408
            # the argument is probably allowed for compatibility with OpenAI
409
            # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
410
            choice = chunk.choices[0]
1✔
411

412
            text = choice.delta.content or ""
1✔
413
            generated_text += text
1✔
414

415
            finish_reason = choice.finish_reason
1✔
416
            if finish_reason:
1✔
417
                meta["finish_reason"] = finish_reason
1✔
418

419
            if first_chunk_time is None:
1✔
420
                first_chunk_time = datetime.now().isoformat()
1✔
421

422
            stream_chunk = StreamingChunk(text, meta)
1✔
423
            streaming_callback(stream_chunk)
1✔
424

425
        meta.update(
1✔
426
            {
427
                "model": self._client.model,
428
                "index": 0,
429
                "usage": {"prompt_tokens": 0, "completion_tokens": 0},  # not available in streaming
430
                "completion_start_time": first_chunk_time,
431
            }
432
        )
433

434
        message = ChatMessage.from_assistant(text=generated_text, meta=meta)
1✔
435
        return {"replies": [message]}
1✔
436

437
    def _run_non_streaming(
1✔
438
        self,
439
        messages: List[Dict[str, str]],
440
        generation_kwargs: Dict[str, Any],
441
        tools: Optional[List["ChatCompletionInputTool"]] = None,
442
    ) -> Dict[str, List[ChatMessage]]:
443
        api_chat_output: ChatCompletionOutput = self._client.chat_completion(
1✔
444
            messages=messages, tools=tools, **generation_kwargs
445
        )
446

447
        if len(api_chat_output.choices) == 0:
1✔
448
            return {"replies": []}
×
449

450
        # n is unused, so the API always returns only one choice
451
        # the argument is probably allowed for compatibility with OpenAI
452
        # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
453
        choice = api_chat_output.choices[0]
1✔
454

455
        text = choice.message.content
1✔
456

457
        tool_calls = _convert_hfapi_tool_calls(choice.message.tool_calls)
1✔
458

459
        meta: Dict[str, Any] = {
1✔
460
            "model": self._client.model,
461
            "finish_reason": choice.finish_reason,
462
            "index": choice.index,
463
        }
464

465
        usage = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
466
        if api_chat_output.usage:
1✔
467
            usage = {
1✔
468
                "prompt_tokens": api_chat_output.usage.prompt_tokens,
469
                "completion_tokens": api_chat_output.usage.completion_tokens,
470
            }
471
        meta["usage"] = usage
1✔
472

473
        message = ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta)
1✔
474
        return {"replies": [message]}
1✔
475

476
    async def _run_streaming_async(
1✔
477
        self, messages: List[Dict[str, str]], generation_kwargs: Dict[str, Any], streaming_callback: StreamingCallbackT
478
    ):
479
        api_output: AsyncIterable[ChatCompletionStreamOutput] = await self._async_client.chat_completion(
1✔
480
            messages, stream=True, **generation_kwargs
481
        )
482

483
        generated_text = ""
1✔
484
        first_chunk_time = None
1✔
485
        meta: Dict[str, Any] = {}
1✔
486

487
        async for chunk in api_output:
1✔
488
            choice = chunk.choices[0]
1✔
489

490
            text = choice.delta.content or ""
1✔
491
            generated_text += text
1✔
492

493
            finish_reason = choice.finish_reason
1✔
494
            if finish_reason:
1✔
495
                meta["finish_reason"] = finish_reason
1✔
496

497
            if first_chunk_time is None:
1✔
498
                first_chunk_time = datetime.now().isoformat()
1✔
499

500
            stream_chunk = StreamingChunk(text, meta)
1✔
501
            await streaming_callback(stream_chunk)  # type: ignore
1✔
502

503
        meta.update(
1✔
504
            {
505
                "model": self._async_client.model,
506
                "index": 0,
507
                "usage": {"prompt_tokens": 0, "completion_tokens": 0},
508
                "completion_start_time": first_chunk_time,
509
            }
510
        )
511

512
        message = ChatMessage.from_assistant(text=generated_text, meta=meta)
1✔
513
        return {"replies": [message]}
1✔
514

515
    async def _run_non_streaming_async(
1✔
516
        self,
517
        messages: List[Dict[str, str]],
518
        generation_kwargs: Dict[str, Any],
519
        tools: Optional[List["ChatCompletionInputTool"]] = None,
520
    ) -> Dict[str, List[ChatMessage]]:
521
        api_chat_output: ChatCompletionOutput = await self._async_client.chat_completion(
1✔
522
            messages=messages, tools=tools, **generation_kwargs
523
        )
524

525
        if len(api_chat_output.choices) == 0:
1✔
526
            return {"replies": []}
×
527

528
        choice = api_chat_output.choices[0]
1✔
529

530
        text = choice.message.content
1✔
531

532
        tool_calls = _convert_hfapi_tool_calls(choice.message.tool_calls)
1✔
533

534
        meta: Dict[str, Any] = {
1✔
535
            "model": self._async_client.model,
536
            "finish_reason": choice.finish_reason,
537
            "index": choice.index,
538
        }
539

540
        usage = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
541
        if api_chat_output.usage:
1✔
542
            usage = {
1✔
543
                "prompt_tokens": api_chat_output.usage.prompt_tokens,
544
                "completion_tokens": api_chat_output.usage.completion_tokens,
545
            }
546
        meta["usage"] = usage
1✔
547

548
        message = ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta)
1✔
549
        return {"replies": [message]}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc