16564070890

Committed 28 Jul 2025 08:26AM UTC coverage: 91.926% (+1.1%) from 90.802%

Build # 16564070890

Build Type

push

github

Committed by

web-flow

Commit Message

fix(embeddings): add `encoding_format` keyword argument when calling OpenAI's `client.embeddings.create` (#9655)

* fix(embeddings): add `encoding_format` keyword argument when calling OpenAI's `client.embeddings.create`.

* fix mypy

---------

Co-authored-by: anakin87 <stefanofiorucci@gmail.com>

Run Details

12785 of 13908 relevant lines covered (91.93%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.51

haystack/components/generators/hugging_face_api.py

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from dataclasses import asdict
from datetime import datetime
from typing import Any, Dict, Iterable, List, Optional, Union, cast

from haystack import component, default_from_dict, default_to_dict, logging
from haystack.dataclasses import (
    ComponentInfo,
    FinishReason,
    StreamingCallbackT,
    StreamingChunk,
    SyncStreamingCallbackT,
    select_streaming_callback,
)
from haystack.lazy_imports import LazyImport
from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model
from haystack.utils.url_validation import is_valid_http_url

with LazyImport(message="Run 'pip install \"huggingface_hub>=0.27.0\"'") as huggingface_hub_import:
    from huggingface_hub import (
        InferenceClient,
        TextGenerationOutput,
        TextGenerationStreamOutput,
        TextGenerationStreamOutputToken,
    )


logger = logging.getLogger(__name__)


@component
class HuggingFaceAPIGenerator:
    """
    Generates text using Hugging Face APIs.

    Use it with the following Hugging Face APIs:
    - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
    - [Self-hosted Text Generation Inference](https://github.com/huggingface/text-generation-inference)

    **Note:** As of July 2025, the Hugging Face Inference API no longer offers generative models through the
    `text_generation` endpoint. Generative models are now only available through providers supporting the
    `chat_completion` endpoint. As a result, this component might no longer work with the Hugging Face Inference API.
    Use the `HuggingFaceAPIChatGenerator` component, which supports the `chat_completion` endpoint.

    ### Usage examples

    #### With Hugging Face Inference Endpoints

    ```python
    from haystack.components.generators import HuggingFaceAPIGenerator
    from haystack.utils import Secret

    generator = HuggingFaceAPIGenerator(api_type="inference_endpoints",
                                        api_params={"url": "<your-inference-endpoint-url>"},
                                        token=Secret.from_token("<your-api-key>"))

    result = generator.run(prompt="What's Natural Language Processing?")
    print(result)
    ```

    #### With self-hosted text generation inference
    ```python
    from haystack.components.generators import HuggingFaceAPIGenerator

    generator = HuggingFaceAPIGenerator(api_type="text_generation_inference",
                                        api_params={"url": "http://localhost:8080"})

    result = generator.run(prompt="What's Natural Language Processing?")
    print(result)
    ```

    #### With the free serverless inference API

    Be aware that this example might not work as the Hugging Face Inference API no longer offer models that support the
    `text_generation` endpoint. Use the `HuggingFaceAPIChatGenerator` for generative models through the
    `chat_completion` endpoint.

    ```python
    from haystack.components.generators import HuggingFaceAPIGenerator
    from haystack.utils import Secret

    generator = HuggingFaceAPIGenerator(api_type="serverless_inference_api",
                                        api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
                                        token=Secret.from_token("<your-api-key>"))

    result = generator.run(prompt="What's Natural Language Processing?")
    print(result)
    ```
    """

    def __init__(  # pylint: disable=too-many-positional-arguments
        self,
        api_type: Union[HFGenerationAPIType, str],
        api_params: Dict[str, str],
        token: Optional[Secret] = Secret.from_env_var(["HF_API_TOKEN", "HF_TOKEN"], strict=False),
        generation_kwargs: Optional[Dict[str, Any]] = None,
        stop_words: Optional[List[str]] = None,
        streaming_callback: Optional[StreamingCallbackT] = None,
    ):
        """
        Initialize the HuggingFaceAPIGenerator instance.

        :param api_type:
            The type of Hugging Face API to use. Available types:
            - `text_generation_inference`: See [TGI](https://github.com/huggingface/text-generation-inference).
            - `inference_endpoints`: See [Inference Endpoints](https://huggingface.co/inference-endpoints).
            - `serverless_inference_api`: See [Serverless Inference API](https://huggingface.co/inference-api).
              This might no longer work due to changes in the models offered in the Hugging Face Inference API.
              Please use the `HuggingFaceAPIChatGenerator` component instead.
        :param api_params:
            A dictionary with the following keys:
            - `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`.
            - `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or
            `TEXT_GENERATION_INFERENCE`.
            - Other parameters specific to the chosen API type, such as `timeout`, `headers`, `provider` etc.
        :param token: The Hugging Face token to use as HTTP bearer authorization.
            Check your HF token in your [account settings](https://huggingface.co/settings/tokens).
        :param generation_kwargs:
            A dictionary with keyword arguments to customize text generation. Some examples: `max_new_tokens`,
            `temperature`, `top_k`, `top_p`.
            For details, see [Hugging Face documentation](https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation)
            for more information.
        :param stop_words: An optional list of strings representing the stop words.
        :param streaming_callback: An optional callable for handling streaming responses.
        """

        huggingface_hub_import.check()

        if isinstance(api_type, str):
            api_type = HFGenerationAPIType.from_str(api_type)

        if api_type == HFGenerationAPIType.SERVERLESS_INFERENCE_API:
            logger.warning(
                "Due to changes in the models offered in Hugging Face Inference API, using this component with the "
                "Serverless Inference API might no longer work. "
                "Please use the `HuggingFaceAPIChatGenerator` component instead."
            )
            model = api_params.get("model")
            if model is None:
                raise ValueError(
                    "To use the Serverless Inference API, you need to specify the `model` parameter in `api_params`."
                )
            check_valid_model(model, HFModelType.GENERATION, token)
            model_or_url = model
        elif api_type in [HFGenerationAPIType.INFERENCE_ENDPOINTS, HFGenerationAPIType.TEXT_GENERATION_INFERENCE]:
            url = api_params.get("url")
            if url is None:
                msg = (
                    "To use Text Generation Inference or Inference Endpoints, you need to specify the `url` "
                    "parameter in `api_params`."
                )
                raise ValueError(msg)
            if not is_valid_http_url(url):
                raise ValueError(f"Invalid URL: {url}")
            model_or_url = url
        else:
            msg = f"Unknown api_type {api_type}"
            raise ValueError(msg)

        # handle generation kwargs setup
        generation_kwargs = generation_kwargs.copy() if generation_kwargs else {}
        generation_kwargs["stop_sequences"] = generation_kwargs.get("stop_sequences", [])
        generation_kwargs["stop_sequences"].extend(stop_words or [])
        generation_kwargs.setdefault("max_new_tokens", 512)

        self.api_type = api_type
        self.api_params = api_params
        self.token = token
        self.generation_kwargs = generation_kwargs
        self.streaming_callback = streaming_callback

        resolved_api_params: Dict[str, Any] = {k: v for k, v in api_params.items() if k != "model" and k != "url"}
        self._client = InferenceClient(
            model_or_url, token=token.resolve_value() if token else None, **resolved_api_params
        )

    def to_dict(self) -> Dict[str, Any]:
        """
        Serialize this component to a dictionary.

        :returns:
            A dictionary containing the serialized component.
        """
        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
        return default_to_dict(
            self,
            api_type=str(self.api_type),
            api_params=self.api_params,
            token=self.token.to_dict() if self.token else None,
            generation_kwargs=self.generation_kwargs,
            streaming_callback=callback_name,
        )

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "HuggingFaceAPIGenerator":
        """
        Deserialize this component from a dictionary.
        """
        deserialize_secrets_inplace(data["init_parameters"], keys=["token"])
        init_params = data["init_parameters"]
        serialized_callback_handler = init_params.get("streaming_callback")
        if serialized_callback_handler:
            init_params["streaming_callback"] = deserialize_callable(serialized_callback_handler)
        return default_from_dict(cls, data)

    @component.output_types(replies=List[str], meta=List[Dict[str, Any]])
    def run(
        self,
        prompt: str,
        streaming_callback: Optional[StreamingCallbackT] = None,
        generation_kwargs: Optional[Dict[str, Any]] = None,
    ):
        """
        Invoke the text generation inference for the given prompt and generation parameters.

        :param prompt:
            A string representing the prompt.
        :param streaming_callback:
            A callback function that is called when a new token is received from the stream.
        :param generation_kwargs:
            Additional keyword arguments for text generation.
        :returns:
            A dictionary with the generated replies and metadata. Both are lists of length n.
            - replies: A list of strings representing the generated replies.
        """
        # update generation kwargs by merging with the default ones
        generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}

        # check if streaming_callback is passed
        streaming_callback = select_streaming_callback(
            init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=False
        )

        hf_output = self._client.text_generation(
            prompt, details=True, stream=streaming_callback is not None, **generation_kwargs
        )

        if streaming_callback is not None:
            # mypy doesn't know that hf_output is a Iterable[TextGenerationStreamOutput], so we cast it
            return self._stream_and_build_response(
                hf_output=cast(Iterable[TextGenerationStreamOutput], hf_output), streaming_callback=streaming_callback
            )

        # mypy doesn't know that hf_output is a TextGenerationOutput, so we cast it
        return self._build_non_streaming_response(cast(TextGenerationOutput, hf_output))

    def _stream_and_build_response(
        self, hf_output: Iterable["TextGenerationStreamOutput"], streaming_callback: SyncStreamingCallbackT
    ):
        chunks: List[StreamingChunk] = []
        first_chunk_time = None

        component_info = ComponentInfo.from_component(self)
        for chunk in hf_output:
            token: TextGenerationStreamOutputToken = chunk.token
            if token.special:
                continue

            chunk_metadata = {**asdict(token), **(asdict(chunk.details) if chunk.details else {})}
            if first_chunk_time is None:
                first_chunk_time = datetime.now().isoformat()

            mapping: Dict[str, FinishReason] = {
                "length": "length",  # Direct match
                "eos_token": "stop",  # EOS token means natural stop
                "stop_sequence": "stop",  # Stop sequence means natural stop
            }
            mapped_finish_reason = (
                mapping.get(chunk_metadata["finish_reason"], "stop") if chunk_metadata.get("finish_reason") else None
            )
            stream_chunk = StreamingChunk(
                content=token.text,
                meta=chunk_metadata,
                component_info=component_info,
                index=0,
                start=len(chunks) == 0,
                finish_reason=mapped_finish_reason,
            )
            chunks.append(stream_chunk)
            streaming_callback(stream_chunk)

        metadata = {
            "finish_reason": chunks[-1].meta.get("finish_reason", None),
            "model": self._client.model,
            "usage": {"completion_tokens": chunks[-1].meta.get("generated_tokens", 0)},
            "completion_start_time": first_chunk_time,
        }
        return {"replies": ["".join([chunk.content for chunk in chunks])], "meta": [metadata]}

    def _build_non_streaming_response(self, hf_output: "TextGenerationOutput"):
        meta = [
            {
                "model": self._client.model,
                "finish_reason": hf_output.details.finish_reason if hf_output.details else None,
                "usage": {"completion_tokens": len(hf_output.details.tokens) if hf_output.details else 0},
            }
        ]
        return {"replies": [hf_output.generated_text], "meta": meta}

1	# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2	#
3	# SPDX-License-Identifier: Apache-2.0
4
5	from dataclasses import asdict	1✔
6	from datetime import datetime	1✔
7	from typing import Any, Dict, Iterable, List, Optional, Union, cast	1✔
8
9	from haystack import component, default_from_dict, default_to_dict, logging	1✔
10	from haystack.dataclasses import (	1✔
11	ComponentInfo,
12	FinishReason,
13	StreamingCallbackT,
14	StreamingChunk,
15	SyncStreamingCallbackT,
16	select_streaming_callback,
17	)
18	from haystack.lazy_imports import LazyImport	1✔
19	from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable	1✔
20	from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model	1✔
21	from haystack.utils.url_validation import is_valid_http_url	1✔
22
23	with LazyImport(message="Run 'pip install \"huggingface_hub>=0.27.0\"'") as huggingface_hub_import:	1✔
24	from huggingface_hub import (	1✔
25	InferenceClient,
26	TextGenerationOutput,
27	TextGenerationStreamOutput,
28	TextGenerationStreamOutputToken,
29	)
30
31
32	logger = logging.getLogger(__name__)	1✔
33
34
35	@component	1✔
36	class HuggingFaceAPIGenerator:	1✔
37	"""
38	Generates text using Hugging Face APIs.
39
40	Use it with the following Hugging Face APIs:
41	- [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
42	- [Self-hosted Text Generation Inference](https://github.com/huggingface/text-generation-inference)
43
44	Note: As of July 2025, the Hugging Face Inference API no longer offers generative models through the
45	`text_generation` endpoint. Generative models are now only available through providers supporting the
46	`chat_completion` endpoint. As a result, this component might no longer work with the Hugging Face Inference API.
47	Use the `HuggingFaceAPIChatGenerator` component, which supports the `chat_completion` endpoint.
48
49	### Usage examples
50
51	#### With Hugging Face Inference Endpoints
52
53	```python
54	from haystack.components.generators import HuggingFaceAPIGenerator
55	from haystack.utils import Secret
56
57	generator = HuggingFaceAPIGenerator(api_type="inference_endpoints",
58	api_params={"url": "<your-inference-endpoint-url>"},
59	token=Secret.from_token("<your-api-key>"))
60
61	result = generator.run(prompt="What's Natural Language Processing?")
62	print(result)
63	```
64
65	#### With self-hosted text generation inference
66	```python
67	from haystack.components.generators import HuggingFaceAPIGenerator
68
69	generator = HuggingFaceAPIGenerator(api_type="text_generation_inference",
70	api_params={"url": "http://localhost:8080"})
71
72	result = generator.run(prompt="What's Natural Language Processing?")
73	print(result)
74	```
75
76	#### With the free serverless inference API
77
78	Be aware that this example might not work as the Hugging Face Inference API no longer offer models that support the
79	`text_generation` endpoint. Use the `HuggingFaceAPIChatGenerator` for generative models through the
80	`chat_completion` endpoint.
81
82	```python
83	from haystack.components.generators import HuggingFaceAPIGenerator
84	from haystack.utils import Secret
85
86	generator = HuggingFaceAPIGenerator(api_type="serverless_inference_api",
87	api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
88	token=Secret.from_token("<your-api-key>"))
89
90	result = generator.run(prompt="What's Natural Language Processing?")
91	print(result)
92	```
93	"""
94
95	def __init__( # pylint: disable=too-many-positional-arguments	1✔
96	self,
97	api_type: Union[HFGenerationAPIType, str],
98	api_params: Dict[str, str],
99	token: Optional[Secret] = Secret.from_env_var(["HF_API_TOKEN", "HF_TOKEN"], strict=False),
100	generation_kwargs: Optional[Dict[str, Any]] = None,
101	stop_words: Optional[List[str]] = None,
102	streaming_callback: Optional[StreamingCallbackT] = None,
103	):
104	"""
105	Initialize the HuggingFaceAPIGenerator instance.
106
107	:param api_type:
108	The type of Hugging Face API to use. Available types:
109	- `text_generation_inference`: See [TGI](https://github.com/huggingface/text-generation-inference).
110	- `inference_endpoints`: See [Inference Endpoints](https://huggingface.co/inference-endpoints).
111	- `serverless_inference_api`: See [Serverless Inference API](https://huggingface.co/inference-api).
112	This might no longer work due to changes in the models offered in the Hugging Face Inference API.
113	Please use the `HuggingFaceAPIChatGenerator` component instead.
114	:param api_params:
115	A dictionary with the following keys:
116	- `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`.
117	- `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or
118	`TEXT_GENERATION_INFERENCE`.
119	- Other parameters specific to the chosen API type, such as `timeout`, `headers`, `provider` etc.
120	:param token: The Hugging Face token to use as HTTP bearer authorization.
121	Check your HF token in your [account settings](https://huggingface.co/settings/tokens).
122	:param generation_kwargs:
123	A dictionary with keyword arguments to customize text generation. Some examples: `max_new_tokens`,
124	`temperature`, `top_k`, `top_p`.
125	For details, see [Hugging Face documentation](https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation)
126	for more information.
127	:param stop_words: An optional list of strings representing the stop words.
128	:param streaming_callback: An optional callable for handling streaming responses.
129	"""
130
131	huggingface_hub_import.check()	1✔
132
133	if isinstance(api_type, str):	1✔
134	api_type = HFGenerationAPIType.from_str(api_type)	1✔
135
136	if api_type == HFGenerationAPIType.SERVERLESS_INFERENCE_API:	1✔
137	logger.warning(	1✔
138	"Due to changes in the models offered in Hugging Face Inference API, using this component with the "
139	"Serverless Inference API might no longer work. "
140	"Please use the `HuggingFaceAPIChatGenerator` component instead."
141	)
142	model = api_params.get("model")	1✔
143	if model is None:	1✔
144	raise ValueError(	1✔
145	"To use the Serverless Inference API, you need to specify the `model` parameter in `api_params`."
146	)
147	check_valid_model(model, HFModelType.GENERATION, token)	1✔
148	model_or_url = model	1✔
149	elif api_type in [HFGenerationAPIType.INFERENCE_ENDPOINTS, HFGenerationAPIType.TEXT_GENERATION_INFERENCE]:	1✔
150	url = api_params.get("url")	1✔
151	if url is None:	1✔
152	msg = (	1✔
153	"To use Text Generation Inference or Inference Endpoints, you need to specify the `url` "
154	"parameter in `api_params`."
155	)
156	raise ValueError(msg)	1✔
157	if not is_valid_http_url(url):	1✔
158	raise ValueError(f"Invalid URL: {url}")	1✔
159	model_or_url = url	1✔
160	else:
161	msg = f"Unknown api_type {api_type}"	×
162	raise ValueError(msg)	×
163
164	# handle generation kwargs setup
165	generation_kwargs = generation_kwargs.copy() if generation_kwargs else {}	1✔
166	generation_kwargs["stop_sequences"] = generation_kwargs.get("stop_sequences", [])	1✔
167	generation_kwargs["stop_sequences"].extend(stop_words or [])	1✔
168	generation_kwargs.setdefault("max_new_tokens", 512)	1✔
169
170	self.api_type = api_type	1✔
171	self.api_params = api_params	1✔
172	self.token = token	1✔
173	self.generation_kwargs = generation_kwargs	1✔
174	self.streaming_callback = streaming_callback	1✔
175
176	resolved_api_params: Dict[str, Any] = {k: v for k, v in api_params.items() if k != "model" and k != "url"}	1✔
177	self._client = InferenceClient(	1✔
178	model_or_url, token=token.resolve_value() if token else None, **resolved_api_params
179	)
180
181	def to_dict(self) -> Dict[str, Any]:	1✔
182	"""
183	Serialize this component to a dictionary.
184
185	:returns:
186	A dictionary containing the serialized component.
187	"""
188	callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None	1✔
189	return default_to_dict(	1✔
190	self,
191	api_type=str(self.api_type),
192	api_params=self.api_params,
193	token=self.token.to_dict() if self.token else None,
194	generation_kwargs=self.generation_kwargs,
195	streaming_callback=callback_name,
196	)
197
198	@classmethod	1✔
199	def from_dict(cls, data: Dict[str, Any]) -> "HuggingFaceAPIGenerator":	1✔
200	"""
201	Deserialize this component from a dictionary.
202	"""
203	deserialize_secrets_inplace(data["init_parameters"], keys=["token"])	1✔
204	init_params = data["init_parameters"]	1✔
205	serialized_callback_handler = init_params.get("streaming_callback")	1✔
206	if serialized_callback_handler:	1✔
207	init_params["streaming_callback"] = deserialize_callable(serialized_callback_handler)	1✔
208	return default_from_dict(cls, data)	1✔
209
210	@component.output_types(replies=List[str], meta=List[Dict[str, Any]])	1✔
211	def run(	1✔
212	self,
213	prompt: str,
214	streaming_callback: Optional[StreamingCallbackT] = None,
215	generation_kwargs: Optional[Dict[str, Any]] = None,
216	):
217	"""
218	Invoke the text generation inference for the given prompt and generation parameters.
219
220	:param prompt:
221	A string representing the prompt.
222	:param streaming_callback:
223	A callback function that is called when a new token is received from the stream.
224	:param generation_kwargs:
225	Additional keyword arguments for text generation.
226	:returns:
227	A dictionary with the generated replies and metadata. Both are lists of length n.
228	- replies: A list of strings representing the generated replies.
229	"""
230	# update generation kwargs by merging with the default ones
231	generation_kwargs = {self.generation_kwargs, (generation_kwargs or {})}	1✔
232
233	# check if streaming_callback is passed
234	streaming_callback = select_streaming_callback(	1✔
235	init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=False
236	)
237
238	hf_output = self._client.text_generation(	1✔
239	prompt, details=True, stream=streaming_callback is not None, **generation_kwargs
240	)
241
242	if streaming_callback is not None:	1✔
243	# mypy doesn't know that hf_output is a Iterable[TextGenerationStreamOutput], so we cast it
244	return self._stream_and_build_response(	1✔
245	hf_output=cast(Iterable[TextGenerationStreamOutput], hf_output), streaming_callback=streaming_callback
246	)
247
248	# mypy doesn't know that hf_output is a TextGenerationOutput, so we cast it
249	return self._build_non_streaming_response(cast(TextGenerationOutput, hf_output))	1✔
250
251	def _stream_and_build_response(	1✔
252	self, hf_output: Iterable["TextGenerationStreamOutput"], streaming_callback: SyncStreamingCallbackT
253	):
254	chunks: List[StreamingChunk] = []	1✔
255	first_chunk_time = None	1✔
256
257	component_info = ComponentInfo.from_component(self)	1✔
258	for chunk in hf_output:	1✔
259	token: TextGenerationStreamOutputToken = chunk.token	1✔
260	if token.special:	1✔
261	continue	×
262
263	chunk_metadata = {asdict(token), (asdict(chunk.details) if chunk.details else {})}	1✔
264	if first_chunk_time is None:	1✔
265	first_chunk_time = datetime.now().isoformat()	1✔
266
267	mapping: Dict[str, FinishReason] = {	1✔
268	"length": "length", # Direct match
269	"eos_token": "stop", # EOS token means natural stop
270	"stop_sequence": "stop", # Stop sequence means natural stop
271	}
272	mapped_finish_reason = (	1✔
273	mapping.get(chunk_metadata["finish_reason"], "stop") if chunk_metadata.get("finish_reason") else None
274	)
275	stream_chunk = StreamingChunk(	1✔
276	content=token.text,
277	meta=chunk_metadata,
278	component_info=component_info,
279	index=0,
280	start=len(chunks) == 0,
281	finish_reason=mapped_finish_reason,
282	)
283	chunks.append(stream_chunk)	1✔
284	streaming_callback(stream_chunk)	1✔
285
286	metadata = {	1✔
287	"finish_reason": chunks[-1].meta.get("finish_reason", None),
288	"model": self._client.model,
289	"usage": {"completion_tokens": chunks[-1].meta.get("generated_tokens", 0)},
290	"completion_start_time": first_chunk_time,
291	}
292	return {"replies": ["".join([chunk.content for chunk in chunks])], "meta": [metadata]}	1✔
293
294	def _build_non_streaming_response(self, hf_output: "TextGenerationOutput"):	1✔
295	meta = [	1✔
296	{
297	"model": self._client.model,
298	"finish_reason": hf_output.details.finish_reason if hf_output.details else None,
299	"usage": {"completion_tokens": len(hf_output.details.tokens) if hf_output.details else 0},
300	}
301	]
302	return {"replies": [hf_output.generated_text], "meta": meta}	1✔

deepset-ai / haystack / 16564070890

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous