• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 15276913207

27 May 2025 01:41PM UTC coverage: 90.41% (+0.02%) from 90.388%
15276913207

Pull #9449

github

web-flow
Merge f55ee47c3 into 3deaa20cb
Pull Request #9449: refactor: Refactor hf api chat generator

11464 of 12680 relevant lines covered (90.41%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.6
haystack/components/generators/chat/hugging_face_api.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import json
1✔
6
from datetime import datetime
1✔
7
from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Union
1✔
8

9
from haystack import component, default_from_dict, default_to_dict, logging
1✔
10
from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
1✔
11
from haystack.dataclasses import ChatMessage, ComponentInfo, StreamingChunk, ToolCall, select_streaming_callback
1✔
12
from haystack.dataclasses.streaming_chunk import StreamingCallbackT
1✔
13
from haystack.lazy_imports import LazyImport
1✔
14
from haystack.tools import (
1✔
15
    Tool,
16
    Toolset,
17
    _check_duplicate_tool_names,
18
    deserialize_tools_or_toolset_inplace,
19
    serialize_tools_or_toolset,
20
)
21
from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
1✔
22
from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model, convert_message_to_hf_format
1✔
23
from haystack.utils.url_validation import is_valid_http_url
1✔
24

25
logger = logging.getLogger(__name__)
1✔
26

27
with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.27.0\"'") as huggingface_hub_import:
1✔
28
    from huggingface_hub import (
1✔
29
        AsyncInferenceClient,
30
        ChatCompletionInputFunctionDefinition,
31
        ChatCompletionInputStreamOptions,
32
        ChatCompletionInputTool,
33
        ChatCompletionOutput,
34
        ChatCompletionOutputToolCall,
35
        ChatCompletionStreamOutput,
36
        InferenceClient,
37
    )
38

39

40
def _convert_hfapi_tool_calls(hfapi_tool_calls: Optional[List["ChatCompletionOutputToolCall"]]) -> List[ToolCall]:
1✔
41
    """
42
    Convert HuggingFace API tool calls to a list of Haystack ToolCall.
43

44
    :param hfapi_tool_calls: The HuggingFace API tool calls to convert.
45
    :returns: A list of ToolCall objects.
46

47
    """
48
    if not hfapi_tool_calls:
1✔
49
        return []
1✔
50

51
    tool_calls = []
1✔
52

53
    for hfapi_tc in hfapi_tool_calls:
1✔
54
        hf_arguments = hfapi_tc.function.arguments
1✔
55

56
        arguments = None
1✔
57
        if isinstance(hf_arguments, dict):
1✔
58
            arguments = hf_arguments
1✔
59
        elif isinstance(hf_arguments, str):
1✔
60
            try:
1✔
61
                arguments = json.loads(hf_arguments)
1✔
62
            except json.JSONDecodeError:
1✔
63
                logger.warning(
1✔
64
                    "HuggingFace API returned a malformed JSON string for tool call arguments. This tool call "
65
                    "will be skipped. Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
66
                    _id=hfapi_tc.id,
67
                    _name=hfapi_tc.function.name,
68
                    _arguments=hf_arguments,
69
                )
70
        else:
71
            logger.warning(
1✔
72
                "HuggingFace API returned tool call arguments of type {_type}. Valid types are dict and str. This tool "
73
                "call will be skipped. Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
74
                _id=hfapi_tc.id,
75
                _name=hfapi_tc.function.name,
76
                _arguments=hf_arguments,
77
            )
78

79
        if arguments:
1✔
80
            tool_calls.append(ToolCall(tool_name=hfapi_tc.function.name, arguments=arguments, id=hfapi_tc.id))
1✔
81

82
    return tool_calls
1✔
83

84

85
def _convert_tools_to_hfapi_tools(
1✔
86
    tools: Optional[Union[List[Tool], Toolset]],
87
) -> Optional[List["ChatCompletionInputTool"]]:
88
    if not tools:
1✔
89
        return None
1✔
90

91
    # huggingface_hub<0.31.0 uses "arguments", huggingface_hub>=0.31.0 uses "parameters"
92
    parameters_name = "arguments" if hasattr(ChatCompletionInputFunctionDefinition, "arguments") else "parameters"
1✔
93

94
    hf_tools = []
1✔
95
    for tool in tools:
1✔
96
        hf_tools_args = {"name": tool.name, "description": tool.description, parameters_name: tool.parameters}
1✔
97

98
        hf_tools.append(
1✔
99
            ChatCompletionInputTool(function=ChatCompletionInputFunctionDefinition(**hf_tools_args), type="function")
100
        )
101

102
    return hf_tools
1✔
103

104

105
def _convert_chat_completion_stream_output_to_streaming_chunk(
1✔
106
    chunk: "ChatCompletionStreamOutput", component_info: Optional[ComponentInfo] = None
107
) -> StreamingChunk:
108
    """
109
    Converts the Hugging Face API ChatCompletionStreamOutput to a StreamingChunk.
110
    """
111
    # Choices is empty if include_usage is set to True where the usage information is returned.
112
    if len(chunk.choices) == 0:
1✔
113
        usage = None
1✔
114
        if chunk.usage:
1✔
115
            usage = {"prompt_tokens": chunk.usage.prompt_tokens, "completion_tokens": chunk.usage.completion_tokens}
1✔
116
        return StreamingChunk(
1✔
117
            content="",
118
            meta={"model": chunk.model, "received_at": datetime.now().isoformat(), "usage": usage},
119
            component_info=component_info,
120
        )
121

122
    # n is unused, so the API always returns only one choice
123
    # the argument is probably allowed for compatibility with OpenAI
124
    # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
125
    choice = chunk.choices[0]
1✔
126
    stream_chunk = StreamingChunk(
1✔
127
        content=choice.delta.content or "",
128
        meta={"model": chunk.model, "received_at": datetime.now().isoformat(), "finish_reason": choice.finish_reason},
129
        component_info=component_info,
130
    )
131
    return stream_chunk
1✔
132

133

134
@component
1✔
135
class HuggingFaceAPIChatGenerator:
1✔
136
    """
137
    Completes chats using Hugging Face APIs.
138

139
    HuggingFaceAPIChatGenerator uses the [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
140
    format for input and output. Use it to generate text with Hugging Face APIs:
141
    - [Free Serverless Inference API](https://huggingface.co/inference-api)
142
    - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
143
    - [Self-hosted Text Generation Inference](https://github.com/huggingface/text-generation-inference)
144

145
    ### Usage examples
146

147
    #### With the free serverless inference API
148

149
    ```python
150
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
151
    from haystack.dataclasses import ChatMessage
152
    from haystack.utils import Secret
153
    from haystack.utils.hf import HFGenerationAPIType
154

155
    messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"),
156
                ChatMessage.from_user("What's Natural Language Processing?")]
157

158
    # the api_type can be expressed using the HFGenerationAPIType enum or as a string
159
    api_type = HFGenerationAPIType.SERVERLESS_INFERENCE_API
160
    api_type = "serverless_inference_api" # this is equivalent to the above
161

162
    generator = HuggingFaceAPIChatGenerator(api_type=api_type,
163
                                            api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
164
                                            token=Secret.from_token("<your-api-key>"))
165

166
    result = generator.run(messages)
167
    print(result)
168
    ```
169

170
    #### With paid inference endpoints
171

172
    ```python
173
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
174
    from haystack.dataclasses import ChatMessage
175
    from haystack.utils import Secret
176

177
    messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"),
178
                ChatMessage.from_user("What's Natural Language Processing?")]
179

180
    generator = HuggingFaceAPIChatGenerator(api_type="inference_endpoints",
181
                                            api_params={"url": "<your-inference-endpoint-url>"},
182
                                            token=Secret.from_token("<your-api-key>"))
183

184
    result = generator.run(messages)
185
    print(result)
186

187
    #### With self-hosted text generation inference
188

189
    ```python
190
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
191
    from haystack.dataclasses import ChatMessage
192

193
    messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"),
194
                ChatMessage.from_user("What's Natural Language Processing?")]
195

196
    generator = HuggingFaceAPIChatGenerator(api_type="text_generation_inference",
197
                                            api_params={"url": "http://localhost:8080"})
198

199
    result = generator.run(messages)
200
    print(result)
201
    ```
202
    """
203

204
    def __init__(  # pylint: disable=too-many-positional-arguments
1✔
205
        self,
206
        api_type: Union[HFGenerationAPIType, str],
207
        api_params: Dict[str, str],
208
        token: Optional[Secret] = Secret.from_env_var(["HF_API_TOKEN", "HF_TOKEN"], strict=False),
209
        generation_kwargs: Optional[Dict[str, Any]] = None,
210
        stop_words: Optional[List[str]] = None,
211
        streaming_callback: Optional[StreamingCallbackT] = None,
212
        tools: Optional[Union[List[Tool], Toolset]] = None,
213
    ):
214
        """
215
        Initialize the HuggingFaceAPIChatGenerator instance.
216

217
        :param api_type:
218
            The type of Hugging Face API to use. Available types:
219
            - `text_generation_inference`: See [TGI](https://github.com/huggingface/text-generation-inference).
220
            - `inference_endpoints`: See [Inference Endpoints](https://huggingface.co/inference-endpoints).
221
            - `serverless_inference_api`: See [Serverless Inference API](https://huggingface.co/inference-api).
222
        :param api_params:
223
            A dictionary with the following keys:
224
            - `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`.
225
            - `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or
226
            `TEXT_GENERATION_INFERENCE`.
227
        :param token:
228
            The Hugging Face token to use as HTTP bearer authorization.
229
            Check your HF token in your [account settings](https://huggingface.co/settings/tokens).
230
        :param generation_kwargs:
231
            A dictionary with keyword arguments to customize text generation.
232
                Some examples: `max_tokens`, `temperature`, `top_p`.
233
                For details, see [Hugging Face chat_completion documentation](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
234
        :param stop_words:
235
            An optional list of strings representing the stop words.
236
        :param streaming_callback:
237
            An optional callable for handling streaming responses.
238
        :param tools:
239
            A list of tools or a Toolset for which the model can prepare calls.
240
            The chosen model should support tool/function calling, according to the model card.
241
            Support for tools in the Hugging Face API and TGI is not yet fully refined and you may experience
242
            unexpected behavior. This parameter can accept either a list of `Tool` objects or a `Toolset` instance.
243
        """
244

245
        huggingface_hub_import.check()
1✔
246

247
        if isinstance(api_type, str):
1✔
248
            api_type = HFGenerationAPIType.from_str(api_type)
1✔
249

250
        if api_type == HFGenerationAPIType.SERVERLESS_INFERENCE_API:
1✔
251
            model = api_params.get("model")
1✔
252
            if model is None:
1✔
253
                raise ValueError(
1✔
254
                    "To use the Serverless Inference API, you need to specify the `model` parameter in `api_params`."
255
                )
256
            check_valid_model(model, HFModelType.GENERATION, token)
1✔
257
            model_or_url = model
1✔
258
        elif api_type in [HFGenerationAPIType.INFERENCE_ENDPOINTS, HFGenerationAPIType.TEXT_GENERATION_INFERENCE]:
1✔
259
            url = api_params.get("url")
1✔
260
            if url is None:
1✔
261
                msg = (
1✔
262
                    "To use Text Generation Inference or Inference Endpoints, you need to specify the `url` parameter "
263
                    "in `api_params`."
264
                )
265
                raise ValueError(msg)
1✔
266
            if not is_valid_http_url(url):
1✔
267
                raise ValueError(f"Invalid URL: {url}")
1✔
268
            model_or_url = url
1✔
269
        else:
270
            msg = f"Unknown api_type {api_type}"
×
271
            raise ValueError(msg)
×
272

273
        if tools and streaming_callback is not None:
1✔
274
            raise ValueError("Using tools and streaming at the same time is not supported. Please choose one.")
1✔
275
        _check_duplicate_tool_names(list(tools or []))
1✔
276

277
        # handle generation kwargs setup
278
        generation_kwargs = generation_kwargs.copy() if generation_kwargs else {}
1✔
279
        generation_kwargs["stop"] = generation_kwargs.get("stop", [])
1✔
280
        generation_kwargs["stop"].extend(stop_words or [])
1✔
281
        generation_kwargs.setdefault("max_tokens", 512)
1✔
282

283
        self.api_type = api_type
1✔
284
        self.api_params = api_params
1✔
285
        self.token = token
1✔
286
        self.generation_kwargs = generation_kwargs
1✔
287
        self.streaming_callback = streaming_callback
1✔
288
        self._client = InferenceClient(model_or_url, token=token.resolve_value() if token else None)
1✔
289
        self._async_client = AsyncInferenceClient(model_or_url, token=token.resolve_value() if token else None)
1✔
290
        self.tools = tools
1✔
291

292
    def to_dict(self) -> Dict[str, Any]:
1✔
293
        """
294
        Serialize this component to a dictionary.
295

296
        :returns:
297
            A dictionary containing the serialized component.
298
        """
299
        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
1✔
300
        return default_to_dict(
1✔
301
            self,
302
            api_type=str(self.api_type),
303
            api_params=self.api_params,
304
            token=self.token.to_dict() if self.token else None,
305
            generation_kwargs=self.generation_kwargs,
306
            streaming_callback=callback_name,
307
            tools=serialize_tools_or_toolset(self.tools),
308
        )
309

310
    @classmethod
1✔
311
    def from_dict(cls, data: Dict[str, Any]) -> "HuggingFaceAPIChatGenerator":
1✔
312
        """
313
        Deserialize this component from a dictionary.
314
        """
315
        deserialize_secrets_inplace(data["init_parameters"], keys=["token"])
1✔
316
        deserialize_tools_or_toolset_inplace(data["init_parameters"], key="tools")
1✔
317
        init_params = data.get("init_parameters", {})
1✔
318
        serialized_callback_handler = init_params.get("streaming_callback")
1✔
319
        if serialized_callback_handler:
1✔
320
            data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
×
321
        return default_from_dict(cls, data)
1✔
322

323
    @component.output_types(replies=List[ChatMessage])
1✔
324
    def run(
1✔
325
        self,
326
        messages: List[ChatMessage],
327
        generation_kwargs: Optional[Dict[str, Any]] = None,
328
        tools: Optional[Union[List[Tool], Toolset]] = None,
329
        streaming_callback: Optional[StreamingCallbackT] = None,
330
    ):
331
        """
332
        Invoke the text generation inference based on the provided messages and generation parameters.
333

334
        :param messages:
335
            A list of ChatMessage objects representing the input messages.
336
        :param generation_kwargs:
337
            Additional keyword arguments for text generation.
338
        :param tools:
339
            A list of tools or a Toolset for which the model can prepare calls. If set, it will override
340
            the `tools` parameter set during component initialization. This parameter can accept either a
341
            list of `Tool` objects or a `Toolset` instance.
342
        :param streaming_callback:
343
            An optional callable for handling streaming responses. If set, it will override the `streaming_callback`
344
            parameter set during component initialization.
345
        :returns: A dictionary with the following keys:
346
            - `replies`: A list containing the generated responses as ChatMessage objects.
347
        """
348

349
        # update generation kwargs by merging with the default ones
350
        generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
1✔
351

352
        formatted_messages = [convert_message_to_hf_format(message) for message in messages]
1✔
353

354
        tools = tools or self.tools
1✔
355
        if tools and self.streaming_callback:
1✔
356
            raise ValueError("Using tools and streaming at the same time is not supported. Please choose one.")
1✔
357
        _check_duplicate_tool_names(list(tools or []))
1✔
358

359
        # validate and select the streaming callback
360
        streaming_callback = select_streaming_callback(
1✔
361
            self.streaming_callback, streaming_callback, requires_async=False
362
        )
363

364
        if streaming_callback:
1✔
365
            return self._run_streaming(formatted_messages, generation_kwargs, streaming_callback)
1✔
366

367
        if tools and isinstance(tools, Toolset):
1✔
368
            tools = list(tools)
×
369

370
        hf_tools = _convert_tools_to_hfapi_tools(tools)
1✔
371

372
        return self._run_non_streaming(formatted_messages, generation_kwargs, hf_tools)
1✔
373

374
    @component.output_types(replies=List[ChatMessage])
1✔
375
    async def run_async(
1✔
376
        self,
377
        messages: List[ChatMessage],
378
        generation_kwargs: Optional[Dict[str, Any]] = None,
379
        tools: Optional[Union[List[Tool], Toolset]] = None,
380
        streaming_callback: Optional[StreamingCallbackT] = None,
381
    ):
382
        """
383
        Asynchronously invokes the text generation inference based on the provided messages and generation parameters.
384

385
        This is the asynchronous version of the `run` method. It has the same parameters
386
        and return values but can be used with `await` in an async code.
387

388
        :param messages:
389
            A list of ChatMessage objects representing the input messages.
390
        :param generation_kwargs:
391
            Additional keyword arguments for text generation.
392
        :param tools:
393
            A list of tools or a Toolset for which the model can prepare calls. If set, it will override the `tools`
394
            parameter set during component initialization. This parameter can accept either a list of `Tool` objects
395
            or a `Toolset` instance.
396
        :param streaming_callback:
397
            An optional callable for handling streaming responses. If set, it will override the `streaming_callback`
398
            parameter set during component initialization.
399
        :returns: A dictionary with the following keys:
400
            - `replies`: A list containing the generated responses as ChatMessage objects.
401
        """
402

403
        # update generation kwargs by merging with the default ones
404
        generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
1✔
405

406
        formatted_messages = [convert_message_to_hf_format(message) for message in messages]
1✔
407

408
        tools = tools or self.tools
1✔
409
        if tools and self.streaming_callback:
1✔
410
            raise ValueError("Using tools and streaming at the same time is not supported. Please choose one.")
×
411
        _check_duplicate_tool_names(list(tools or []))
1✔
412

413
        # validate and select the streaming callback
414
        streaming_callback = select_streaming_callback(self.streaming_callback, streaming_callback, requires_async=True)
1✔
415

416
        if streaming_callback:
1✔
417
            return await self._run_streaming_async(formatted_messages, generation_kwargs, streaming_callback)
1✔
418

419
        if tools and isinstance(tools, Toolset):
1✔
420
            tools = list(tools)
×
421

422
        hf_tools = _convert_tools_to_hfapi_tools(tools)
1✔
423

424
        return await self._run_non_streaming_async(formatted_messages, generation_kwargs, hf_tools)
1✔
425

426
    def _run_streaming(
1✔
427
        self, messages: List[Dict[str, str]], generation_kwargs: Dict[str, Any], streaming_callback: StreamingCallbackT
428
    ):
429
        api_output: Iterable[ChatCompletionStreamOutput] = self._client.chat_completion(
1✔
430
            messages,
431
            stream=True,
432
            stream_options=ChatCompletionInputStreamOptions(include_usage=True),
433
            **generation_kwargs,
434
        )
435

436
        component_info = ComponentInfo.from_component(self)
1✔
437
        streaming_chunks = []
1✔
438
        for chunk in api_output:
1✔
439
            streaming_chunk = _convert_chat_completion_stream_output_to_streaming_chunk(
1✔
440
                chunk=chunk, component_info=component_info
441
            )
442
            streaming_chunks.append(streaming_chunk)
1✔
443
            streaming_callback(streaming_chunk)
1✔
444

445
        message = _convert_streaming_chunks_to_chat_message(chunks=streaming_chunks)
1✔
446
        if message.meta.get("usage") is None:
1✔
447
            message.meta["usage"] = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
448

449
        return {"replies": [message]}
1✔
450

451
    def _run_non_streaming(
1✔
452
        self,
453
        messages: List[Dict[str, str]],
454
        generation_kwargs: Dict[str, Any],
455
        tools: Optional[List["ChatCompletionInputTool"]] = None,
456
    ) -> Dict[str, List[ChatMessage]]:
457
        api_chat_output: ChatCompletionOutput = self._client.chat_completion(
1✔
458
            messages=messages, tools=tools, **generation_kwargs
459
        )
460

461
        if len(api_chat_output.choices) == 0:
1✔
462
            return {"replies": []}
×
463

464
        # n is unused, so the API always returns only one choice
465
        # the argument is probably allowed for compatibility with OpenAI
466
        # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
467
        choice = api_chat_output.choices[0]
1✔
468

469
        text = choice.message.content
1✔
470

471
        tool_calls = _convert_hfapi_tool_calls(choice.message.tool_calls)
1✔
472

473
        meta: Dict[str, Any] = {
1✔
474
            "model": self._client.model,
475
            "finish_reason": choice.finish_reason,
476
            "index": choice.index,
477
        }
478

479
        usage = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
480
        if api_chat_output.usage:
1✔
481
            usage = {
1✔
482
                "prompt_tokens": api_chat_output.usage.prompt_tokens,
483
                "completion_tokens": api_chat_output.usage.completion_tokens,
484
            }
485
        meta["usage"] = usage
1✔
486

487
        message = ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta)
1✔
488
        return {"replies": [message]}
1✔
489

490
    async def _run_streaming_async(
1✔
491
        self, messages: List[Dict[str, str]], generation_kwargs: Dict[str, Any], streaming_callback: StreamingCallbackT
492
    ):
493
        api_output: AsyncIterable[ChatCompletionStreamOutput] = await self._async_client.chat_completion(
1✔
494
            messages,
495
            stream=True,
496
            stream_options=ChatCompletionInputStreamOptions(include_usage=True),
497
            **generation_kwargs,
498
        )
499

500
        component_info = ComponentInfo.from_component(self)
1✔
501
        streaming_chunks = []
1✔
502
        async for chunk in api_output:
1✔
503
            stream_chunk = _convert_chat_completion_stream_output_to_streaming_chunk(
1✔
504
                chunk=chunk, component_info=component_info
505
            )
506
            streaming_chunks.append(stream_chunk)
1✔
507
            await streaming_callback(stream_chunk)  # type: ignore
1✔
508

509
        message = _convert_streaming_chunks_to_chat_message(chunks=streaming_chunks)
1✔
510
        if message.meta.get("usage") is None:
1✔
511
            message.meta["usage"] = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
512

513
        return {"replies": [message]}
1✔
514

515
    async def _run_non_streaming_async(
1✔
516
        self,
517
        messages: List[Dict[str, str]],
518
        generation_kwargs: Dict[str, Any],
519
        tools: Optional[List["ChatCompletionInputTool"]] = None,
520
    ) -> Dict[str, List[ChatMessage]]:
521
        api_chat_output: ChatCompletionOutput = await self._async_client.chat_completion(
1✔
522
            messages=messages, tools=tools, **generation_kwargs
523
        )
524

525
        if len(api_chat_output.choices) == 0:
1✔
526
            return {"replies": []}
×
527

528
        choice = api_chat_output.choices[0]
1✔
529

530
        text = choice.message.content
1✔
531

532
        tool_calls = _convert_hfapi_tool_calls(choice.message.tool_calls)
1✔
533

534
        meta: Dict[str, Any] = {
1✔
535
            "model": self._async_client.model,
536
            "finish_reason": choice.finish_reason,
537
            "index": choice.index,
538
        }
539

540
        usage = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
541
        if api_chat_output.usage:
1✔
542
            usage = {
1✔
543
                "prompt_tokens": api_chat_output.usage.prompt_tokens,
544
                "completion_tokens": api_chat_output.usage.completion_tokens,
545
            }
546
        meta["usage"] = usage
1✔
547

548
        message = ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta)
1✔
549
        return {"replies": [message]}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc