• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 15272575453

27 May 2025 10:12AM UTC coverage: 90.201% (+0.005%) from 90.196%
15272575453

Pull #9426

github

web-flow
Merge 449109f4a into 085c3add4
Pull Request #9426: feat: add component name and type to `StreamingChunk`

11405 of 12644 relevant lines covered (90.2%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.0
haystack/components/generators/chat/hugging_face_api.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import json
1✔
6
from datetime import datetime
1✔
7
from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Union
1✔
8

9
from haystack import component, default_from_dict, default_to_dict, logging
1✔
10
from haystack.dataclasses import ChatMessage, ComponentInfo, StreamingChunk, ToolCall, select_streaming_callback
1✔
11
from haystack.dataclasses.streaming_chunk import StreamingCallbackT
1✔
12
from haystack.lazy_imports import LazyImport
1✔
13
from haystack.tools import (
1✔
14
    Tool,
15
    Toolset,
16
    _check_duplicate_tool_names,
17
    deserialize_tools_or_toolset_inplace,
18
    serialize_tools_or_toolset,
19
)
20
from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
1✔
21
from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model, convert_message_to_hf_format
1✔
22
from haystack.utils.url_validation import is_valid_http_url
1✔
23

24
logger = logging.getLogger(__name__)
1✔
25

26
with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.27.0\"'") as huggingface_hub_import:
1✔
27
    from huggingface_hub import (
1✔
28
        AsyncInferenceClient,
29
        ChatCompletionInputFunctionDefinition,
30
        ChatCompletionInputStreamOptions,
31
        ChatCompletionInputTool,
32
        ChatCompletionOutput,
33
        ChatCompletionOutputToolCall,
34
        ChatCompletionStreamOutput,
35
        InferenceClient,
36
    )
37

38

39
def _convert_hfapi_tool_calls(hfapi_tool_calls: Optional[List["ChatCompletionOutputToolCall"]]) -> List[ToolCall]:
1✔
40
    """
41
    Convert HuggingFace API tool calls to a list of Haystack ToolCall.
42

43
    :param hfapi_tool_calls: The HuggingFace API tool calls to convert.
44
    :returns: A list of ToolCall objects.
45

46
    """
47
    if not hfapi_tool_calls:
1✔
48
        return []
1✔
49

50
    tool_calls = []
1✔
51

52
    for hfapi_tc in hfapi_tool_calls:
1✔
53
        hf_arguments = hfapi_tc.function.arguments
1✔
54

55
        arguments = None
1✔
56
        if isinstance(hf_arguments, dict):
1✔
57
            arguments = hf_arguments
1✔
58
        elif isinstance(hf_arguments, str):
1✔
59
            try:
1✔
60
                arguments = json.loads(hf_arguments)
1✔
61
            except json.JSONDecodeError:
1✔
62
                logger.warning(
1✔
63
                    "HuggingFace API returned a malformed JSON string for tool call arguments. This tool call "
64
                    "will be skipped. Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
65
                    _id=hfapi_tc.id,
66
                    _name=hfapi_tc.function.name,
67
                    _arguments=hf_arguments,
68
                )
69
        else:
70
            logger.warning(
1✔
71
                "HuggingFace API returned tool call arguments of type {_type}. Valid types are dict and str. This tool "
72
                "call will be skipped. Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
73
                _id=hfapi_tc.id,
74
                _name=hfapi_tc.function.name,
75
                _arguments=hf_arguments,
76
            )
77

78
        if arguments:
1✔
79
            tool_calls.append(ToolCall(tool_name=hfapi_tc.function.name, arguments=arguments, id=hfapi_tc.id))
1✔
80

81
    return tool_calls
1✔
82

83

84
def _convert_tools_to_hfapi_tools(
1✔
85
    tools: Optional[Union[List[Tool], Toolset]],
86
) -> Optional[List["ChatCompletionInputTool"]]:
87
    if not tools:
1✔
88
        return None
1✔
89

90
    # huggingface_hub<0.31.0 uses "arguments", huggingface_hub>=0.31.0 uses "parameters"
91
    parameters_name = "arguments" if hasattr(ChatCompletionInputFunctionDefinition, "arguments") else "parameters"
1✔
92

93
    hf_tools = []
1✔
94
    for tool in tools:
1✔
95
        hf_tools_args = {"name": tool.name, "description": tool.description, parameters_name: tool.parameters}
1✔
96

97
        hf_tools.append(
1✔
98
            ChatCompletionInputTool(function=ChatCompletionInputFunctionDefinition(**hf_tools_args), type="function")
99
        )
100

101
    return hf_tools
1✔
102

103

104
@component
1✔
105
class HuggingFaceAPIChatGenerator:
1✔
106
    """
107
    Completes chats using Hugging Face APIs.
108

109
    HuggingFaceAPIChatGenerator uses the [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
110
    format for input and output. Use it to generate text with Hugging Face APIs:
111
    - [Free Serverless Inference API](https://huggingface.co/inference-api)
112
    - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
113
    - [Self-hosted Text Generation Inference](https://github.com/huggingface/text-generation-inference)
114

115
    ### Usage examples
116

117
    #### With the free serverless inference API
118

119
    ```python
120
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
121
    from haystack.dataclasses import ChatMessage
122
    from haystack.utils import Secret
123
    from haystack.utils.hf import HFGenerationAPIType
124

125
    messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"),
126
                ChatMessage.from_user("What's Natural Language Processing?")]
127

128
    # the api_type can be expressed using the HFGenerationAPIType enum or as a string
129
    api_type = HFGenerationAPIType.SERVERLESS_INFERENCE_API
130
    api_type = "serverless_inference_api" # this is equivalent to the above
131

132
    generator = HuggingFaceAPIChatGenerator(api_type=api_type,
133
                                            api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
134
                                            token=Secret.from_token("<your-api-key>"))
135

136
    result = generator.run(messages)
137
    print(result)
138
    ```
139

140
    #### With paid inference endpoints
141

142
    ```python
143
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
144
    from haystack.dataclasses import ChatMessage
145
    from haystack.utils import Secret
146

147
    messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"),
148
                ChatMessage.from_user("What's Natural Language Processing?")]
149

150
    generator = HuggingFaceAPIChatGenerator(api_type="inference_endpoints",
151
                                            api_params={"url": "<your-inference-endpoint-url>"},
152
                                            token=Secret.from_token("<your-api-key>"))
153

154
    result = generator.run(messages)
155
    print(result)
156

157
    #### With self-hosted text generation inference
158

159
    ```python
160
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
161
    from haystack.dataclasses import ChatMessage
162

163
    messages = [ChatMessage.from_system("\\nYou are a helpful, respectful and honest assistant"),
164
                ChatMessage.from_user("What's Natural Language Processing?")]
165

166
    generator = HuggingFaceAPIChatGenerator(api_type="text_generation_inference",
167
                                            api_params={"url": "http://localhost:8080"})
168

169
    result = generator.run(messages)
170
    print(result)
171
    ```
172
    """
173

174
    def __init__(  # pylint: disable=too-many-positional-arguments
1✔
175
        self,
176
        api_type: Union[HFGenerationAPIType, str],
177
        api_params: Dict[str, str],
178
        token: Optional[Secret] = Secret.from_env_var(["HF_API_TOKEN", "HF_TOKEN"], strict=False),
179
        generation_kwargs: Optional[Dict[str, Any]] = None,
180
        stop_words: Optional[List[str]] = None,
181
        streaming_callback: Optional[StreamingCallbackT] = None,
182
        tools: Optional[Union[List[Tool], Toolset]] = None,
183
    ):
184
        """
185
        Initialize the HuggingFaceAPIChatGenerator instance.
186

187
        :param api_type:
188
            The type of Hugging Face API to use. Available types:
189
            - `text_generation_inference`: See [TGI](https://github.com/huggingface/text-generation-inference).
190
            - `inference_endpoints`: See [Inference Endpoints](https://huggingface.co/inference-endpoints).
191
            - `serverless_inference_api`: See [Serverless Inference API](https://huggingface.co/inference-api).
192
        :param api_params:
193
            A dictionary with the following keys:
194
            - `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`.
195
            - `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or
196
            `TEXT_GENERATION_INFERENCE`.
197
        :param token:
198
            The Hugging Face token to use as HTTP bearer authorization.
199
            Check your HF token in your [account settings](https://huggingface.co/settings/tokens).
200
        :param generation_kwargs:
201
            A dictionary with keyword arguments to customize text generation.
202
                Some examples: `max_tokens`, `temperature`, `top_p`.
203
                For details, see [Hugging Face chat_completion documentation](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
204
        :param stop_words:
205
            An optional list of strings representing the stop words.
206
        :param streaming_callback:
207
            An optional callable for handling streaming responses.
208
        :param tools:
209
            A list of tools or a Toolset for which the model can prepare calls.
210
            The chosen model should support tool/function calling, according to the model card.
211
            Support for tools in the Hugging Face API and TGI is not yet fully refined and you may experience
212
            unexpected behavior. This parameter can accept either a list of `Tool` objects or a `Toolset` instance.
213
        """
214

215
        huggingface_hub_import.check()
1✔
216

217
        if isinstance(api_type, str):
1✔
218
            api_type = HFGenerationAPIType.from_str(api_type)
1✔
219

220
        if api_type == HFGenerationAPIType.SERVERLESS_INFERENCE_API:
1✔
221
            model = api_params.get("model")
1✔
222
            if model is None:
1✔
223
                raise ValueError(
1✔
224
                    "To use the Serverless Inference API, you need to specify the `model` parameter in `api_params`."
225
                )
226
            check_valid_model(model, HFModelType.GENERATION, token)
1✔
227
            model_or_url = model
1✔
228
        elif api_type in [HFGenerationAPIType.INFERENCE_ENDPOINTS, HFGenerationAPIType.TEXT_GENERATION_INFERENCE]:
1✔
229
            url = api_params.get("url")
1✔
230
            if url is None:
1✔
231
                msg = (
1✔
232
                    "To use Text Generation Inference or Inference Endpoints, you need to specify the `url` parameter "
233
                    "in `api_params`."
234
                )
235
                raise ValueError(msg)
1✔
236
            if not is_valid_http_url(url):
1✔
237
                raise ValueError(f"Invalid URL: {url}")
1✔
238
            model_or_url = url
1✔
239
        else:
240
            msg = f"Unknown api_type {api_type}"
×
241
            raise ValueError(msg)
×
242

243
        if tools and streaming_callback is not None:
1✔
244
            raise ValueError("Using tools and streaming at the same time is not supported. Please choose one.")
1✔
245
        _check_duplicate_tool_names(list(tools or []))
1✔
246

247
        # handle generation kwargs setup
248
        generation_kwargs = generation_kwargs.copy() if generation_kwargs else {}
1✔
249
        generation_kwargs["stop"] = generation_kwargs.get("stop", [])
1✔
250
        generation_kwargs["stop"].extend(stop_words or [])
1✔
251
        generation_kwargs.setdefault("max_tokens", 512)
1✔
252

253
        self.api_type = api_type
1✔
254
        self.api_params = api_params
1✔
255
        self.token = token
1✔
256
        self.generation_kwargs = generation_kwargs
1✔
257
        self.streaming_callback = streaming_callback
1✔
258
        self._client = InferenceClient(model_or_url, token=token.resolve_value() if token else None)
1✔
259
        self._async_client = AsyncInferenceClient(model_or_url, token=token.resolve_value() if token else None)
1✔
260
        self.tools = tools
1✔
261

262
    def to_dict(self) -> Dict[str, Any]:
1✔
263
        """
264
        Serialize this component to a dictionary.
265

266
        :returns:
267
            A dictionary containing the serialized component.
268
        """
269
        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
1✔
270
        return default_to_dict(
1✔
271
            self,
272
            api_type=str(self.api_type),
273
            api_params=self.api_params,
274
            token=self.token.to_dict() if self.token else None,
275
            generation_kwargs=self.generation_kwargs,
276
            streaming_callback=callback_name,
277
            tools=serialize_tools_or_toolset(self.tools),
278
        )
279

280
    @classmethod
1✔
281
    def from_dict(cls, data: Dict[str, Any]) -> "HuggingFaceAPIChatGenerator":
1✔
282
        """
283
        Deserialize this component from a dictionary.
284
        """
285
        deserialize_secrets_inplace(data["init_parameters"], keys=["token"])
1✔
286
        deserialize_tools_or_toolset_inplace(data["init_parameters"], key="tools")
1✔
287
        init_params = data.get("init_parameters", {})
1✔
288
        serialized_callback_handler = init_params.get("streaming_callback")
1✔
289
        if serialized_callback_handler:
1✔
290
            data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
×
291
        return default_from_dict(cls, data)
1✔
292

293
    @component.output_types(replies=List[ChatMessage])
1✔
294
    def run(
1✔
295
        self,
296
        messages: List[ChatMessage],
297
        generation_kwargs: Optional[Dict[str, Any]] = None,
298
        tools: Optional[Union[List[Tool], Toolset]] = None,
299
        streaming_callback: Optional[StreamingCallbackT] = None,
300
    ):
301
        """
302
        Invoke the text generation inference based on the provided messages and generation parameters.
303

304
        :param messages:
305
            A list of ChatMessage objects representing the input messages.
306
        :param generation_kwargs:
307
            Additional keyword arguments for text generation.
308
        :param tools:
309
            A list of tools or a Toolset for which the model can prepare calls. If set, it will override
310
            the `tools` parameter set during component initialization. This parameter can accept either a
311
            list of `Tool` objects or a `Toolset` instance.
312
        :param streaming_callback:
313
            An optional callable for handling streaming responses. If set, it will override the `streaming_callback`
314
            parameter set during component initialization.
315
        :returns: A dictionary with the following keys:
316
            - `replies`: A list containing the generated responses as ChatMessage objects.
317
        """
318

319
        # update generation kwargs by merging with the default ones
320
        generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
1✔
321

322
        formatted_messages = [convert_message_to_hf_format(message) for message in messages]
1✔
323

324
        tools = tools or self.tools
1✔
325
        if tools and self.streaming_callback:
1✔
326
            raise ValueError("Using tools and streaming at the same time is not supported. Please choose one.")
1✔
327
        _check_duplicate_tool_names(list(tools or []))
1✔
328

329
        # validate and select the streaming callback
330
        streaming_callback = select_streaming_callback(
1✔
331
            self.streaming_callback, streaming_callback, requires_async=False
332
        )
333

334
        if streaming_callback:
1✔
335
            return self._run_streaming(formatted_messages, generation_kwargs, streaming_callback)
1✔
336

337
        if tools and isinstance(tools, Toolset):
1✔
338
            tools = list(tools)
×
339

340
        hf_tools = _convert_tools_to_hfapi_tools(tools)
1✔
341

342
        return self._run_non_streaming(formatted_messages, generation_kwargs, hf_tools)
1✔
343

344
    @component.output_types(replies=List[ChatMessage])
1✔
345
    async def run_async(
1✔
346
        self,
347
        messages: List[ChatMessage],
348
        generation_kwargs: Optional[Dict[str, Any]] = None,
349
        tools: Optional[Union[List[Tool], Toolset]] = None,
350
        streaming_callback: Optional[StreamingCallbackT] = None,
351
    ):
352
        """
353
        Asynchronously invokes the text generation inference based on the provided messages and generation parameters.
354

355
        This is the asynchronous version of the `run` method. It has the same parameters
356
        and return values but can be used with `await` in an async code.
357

358
        :param messages:
359
            A list of ChatMessage objects representing the input messages.
360
        :param generation_kwargs:
361
            Additional keyword arguments for text generation.
362
        :param tools:
363
            A list of tools or a Toolset for which the model can prepare calls. If set, it will override the `tools`
364
            parameter set during component initialization. This parameter can accept either a list of `Tool` objects
365
            or a `Toolset` instance.
366
        :param streaming_callback:
367
            An optional callable for handling streaming responses. If set, it will override the `streaming_callback`
368
            parameter set during component initialization.
369
        :returns: A dictionary with the following keys:
370
            - `replies`: A list containing the generated responses as ChatMessage objects.
371
        """
372

373
        # update generation kwargs by merging with the default ones
374
        generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
1✔
375

376
        formatted_messages = [convert_message_to_hf_format(message) for message in messages]
1✔
377

378
        tools = tools or self.tools
1✔
379
        if tools and self.streaming_callback:
1✔
380
            raise ValueError("Using tools and streaming at the same time is not supported. Please choose one.")
×
381
        _check_duplicate_tool_names(list(tools or []))
1✔
382

383
        # validate and select the streaming callback
384
        streaming_callback = select_streaming_callback(self.streaming_callback, streaming_callback, requires_async=True)
1✔
385

386
        if streaming_callback:
1✔
387
            return await self._run_streaming_async(formatted_messages, generation_kwargs, streaming_callback)
1✔
388

389
        if tools and isinstance(tools, Toolset):
1✔
390
            tools = list(tools)
×
391

392
        hf_tools = _convert_tools_to_hfapi_tools(tools)
1✔
393

394
        return await self._run_non_streaming_async(formatted_messages, generation_kwargs, hf_tools)
1✔
395

396
    def _run_streaming(
1✔
397
        self, messages: List[Dict[str, str]], generation_kwargs: Dict[str, Any], streaming_callback: StreamingCallbackT
398
    ):
399
        api_output: Iterable[ChatCompletionStreamOutput] = self._client.chat_completion(
1✔
400
            messages,
401
            stream=True,
402
            stream_options=ChatCompletionInputStreamOptions(include_usage=True),
403
            **generation_kwargs,
404
        )
405

406
        generated_text = ""
1✔
407
        first_chunk_time = None
1✔
408
        finish_reason = None
1✔
409
        usage = None
1✔
410
        meta: Dict[str, Any] = {}
1✔
411

412
        # get the component name and type
413
        component_info = ComponentInfo.from_component(self)
1✔
414

415
        # Set up streaming handler
416
        for chunk in api_output:
1✔
417
            # The chunk with usage returns an empty array for choices
418
            if len(chunk.choices) > 0:
1✔
419
                # n is unused, so the API always returns only one choice
420
                # the argument is probably allowed for compatibility with OpenAI
421
                # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
422
                choice = chunk.choices[0]
1✔
423

424
                text = choice.delta.content or ""
1✔
425
                generated_text += text
1✔
426

427
                if choice.finish_reason:
1✔
428
                    finish_reason = choice.finish_reason
1✔
429

430
                stream_chunk = StreamingChunk(content=text, meta=meta, component_info=component_info)
1✔
431
                streaming_callback(stream_chunk)
1✔
432

433
            if chunk.usage:
1✔
434
                usage = chunk.usage
×
435

436
            if first_chunk_time is None:
1✔
437
                first_chunk_time = datetime.now().isoformat()
1✔
438

439
        if usage:
1✔
440
            usage_dict = {"prompt_tokens": usage.prompt_tokens, "completion_tokens": usage.completion_tokens}
×
441
        else:
442
            usage_dict = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
443

444
        meta.update(
1✔
445
            {
446
                "model": self._client.model,
447
                "index": 0,
448
                "finish_reason": finish_reason,
449
                "usage": usage_dict,
450
                "completion_start_time": first_chunk_time,
451
            }
452
        )
453

454
        message = ChatMessage.from_assistant(text=generated_text, meta=meta)
1✔
455
        return {"replies": [message]}
1✔
456

457
    def _run_non_streaming(
1✔
458
        self,
459
        messages: List[Dict[str, str]],
460
        generation_kwargs: Dict[str, Any],
461
        tools: Optional[List["ChatCompletionInputTool"]] = None,
462
    ) -> Dict[str, List[ChatMessage]]:
463
        api_chat_output: ChatCompletionOutput = self._client.chat_completion(
1✔
464
            messages=messages, tools=tools, **generation_kwargs
465
        )
466

467
        if len(api_chat_output.choices) == 0:
1✔
468
            return {"replies": []}
×
469

470
        # n is unused, so the API always returns only one choice
471
        # the argument is probably allowed for compatibility with OpenAI
472
        # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
473
        choice = api_chat_output.choices[0]
1✔
474

475
        text = choice.message.content
1✔
476

477
        tool_calls = _convert_hfapi_tool_calls(choice.message.tool_calls)
1✔
478

479
        meta: Dict[str, Any] = {
1✔
480
            "model": self._client.model,
481
            "finish_reason": choice.finish_reason,
482
            "index": choice.index,
483
        }
484

485
        usage = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
486
        if api_chat_output.usage:
1✔
487
            usage = {
1✔
488
                "prompt_tokens": api_chat_output.usage.prompt_tokens,
489
                "completion_tokens": api_chat_output.usage.completion_tokens,
490
            }
491
        meta["usage"] = usage
1✔
492

493
        message = ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta)
1✔
494
        return {"replies": [message]}
1✔
495

496
    async def _run_streaming_async(
1✔
497
        self, messages: List[Dict[str, str]], generation_kwargs: Dict[str, Any], streaming_callback: StreamingCallbackT
498
    ):
499
        api_output: AsyncIterable[ChatCompletionStreamOutput] = await self._async_client.chat_completion(
1✔
500
            messages,
501
            stream=True,
502
            stream_options=ChatCompletionInputStreamOptions(include_usage=True),
503
            **generation_kwargs,
504
        )
505

506
        generated_text = ""
1✔
507
        first_chunk_time = None
1✔
508
        finish_reason = None
1✔
509
        usage = None
1✔
510
        meta: Dict[str, Any] = {}
1✔
511

512
        # get the component name and type
513
        component_info = ComponentInfo.from_component(self)
1✔
514

515
        async for chunk in api_output:
1✔
516
            # The chunk with usage returns an empty array for choices
517
            if len(chunk.choices) > 0:
1✔
518
                # n is unused, so the API always returns only one choice
519
                # the argument is probably allowed for compatibility with OpenAI
520
                # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
521
                choice = chunk.choices[0]
1✔
522

523
                text = choice.delta.content or ""
1✔
524
                generated_text += text
1✔
525

526
                stream_chunk = StreamingChunk(content=text, meta=meta, component_info=component_info)
1✔
527
                await streaming_callback(stream_chunk)  # type: ignore
1✔
528

529
            if chunk.usage:
1✔
530
                usage = chunk.usage
×
531

532
            if first_chunk_time is None:
1✔
533
                first_chunk_time = datetime.now().isoformat()
1✔
534

535
        if usage:
1✔
536
            usage_dict = {"prompt_tokens": usage.prompt_tokens, "completion_tokens": usage.completion_tokens}
×
537
        else:
538
            usage_dict = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
539

540
        meta.update(
1✔
541
            {
542
                "model": self._async_client.model,
543
                "index": 0,
544
                "finish_reason": finish_reason,
545
                "usage": usage_dict,
546
                "completion_start_time": first_chunk_time,
547
            }
548
        )
549

550
        message = ChatMessage.from_assistant(text=generated_text, meta=meta)
1✔
551
        return {"replies": [message]}
1✔
552

553
    async def _run_non_streaming_async(
1✔
554
        self,
555
        messages: List[Dict[str, str]],
556
        generation_kwargs: Dict[str, Any],
557
        tools: Optional[List["ChatCompletionInputTool"]] = None,
558
    ) -> Dict[str, List[ChatMessage]]:
559
        api_chat_output: ChatCompletionOutput = await self._async_client.chat_completion(
1✔
560
            messages=messages, tools=tools, **generation_kwargs
561
        )
562

563
        if len(api_chat_output.choices) == 0:
1✔
564
            return {"replies": []}
×
565

566
        choice = api_chat_output.choices[0]
1✔
567

568
        text = choice.message.content
1✔
569

570
        tool_calls = _convert_hfapi_tool_calls(choice.message.tool_calls)
1✔
571

572
        meta: Dict[str, Any] = {
1✔
573
            "model": self._async_client.model,
574
            "finish_reason": choice.finish_reason,
575
            "index": choice.index,
576
        }
577

578
        usage = {"prompt_tokens": 0, "completion_tokens": 0}
1✔
579
        if api_chat_output.usage:
1✔
580
            usage = {
1✔
581
                "prompt_tokens": api_chat_output.usage.prompt_tokens,
582
                "completion_tokens": api_chat_output.usage.completion_tokens,
583
            }
584
        meta["usage"] = usage
1✔
585

586
        message = ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta)
1✔
587
        return {"replies": [message]}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc