• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 13972131258

20 Mar 2025 02:43PM UTC coverage: 90.021% (-0.03%) from 90.054%
13972131258

Pull #9069

github

web-flow
Merge 8371761b0 into 67ab3788e
Pull Request #9069: refactor!: `ChatMessage` serialization-deserialization updates

9833 of 10923 relevant lines covered (90.02%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.1
haystack/components/embedders/hugging_face_api_text_embedder.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import warnings
1✔
6
from typing import Any, Dict, List, Optional, Union
1✔
7

8
from haystack import component, default_from_dict, default_to_dict
1✔
9
from haystack.lazy_imports import LazyImport
1✔
10
from haystack.utils import Secret, deserialize_secrets_inplace
1✔
11
from haystack.utils.hf import HFEmbeddingAPIType, HFModelType, check_valid_model
1✔
12
from haystack.utils.url_validation import is_valid_http_url
1✔
13

14
with LazyImport(message="Run 'pip install \"huggingface_hub>=0.27.0\"'") as huggingface_hub_import:
1✔
15
    from huggingface_hub import InferenceClient
1✔
16

17

18
@component
1✔
19
class HuggingFaceAPITextEmbedder:
1✔
20
    """
21
    Embeds strings using Hugging Face APIs.
22

23
    Use it with the following Hugging Face APIs:
24
    - [Free Serverless Inference API](https://huggingface.co/inference-api)
25
    - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
26
    - [Self-hosted Text Embeddings Inference](https://github.com/huggingface/text-embeddings-inference)
27

28
    ### Usage examples
29

30
    #### With free serverless inference API
31

32
    ```python
33
    from haystack.components.embedders import HuggingFaceAPITextEmbedder
34
    from haystack.utils import Secret
35

36
    text_embedder = HuggingFaceAPITextEmbedder(api_type="serverless_inference_api",
37
                                               api_params={"model": "BAAI/bge-small-en-v1.5"},
38
                                               token=Secret.from_token("<your-api-key>"))
39

40
    print(text_embedder.run("I love pizza!"))
41

42
    # {'embedding': [0.017020374536514282, -0.023255806416273117, ...],
43
    ```
44

45
    #### With paid inference endpoints
46

47
    ```python
48
    from haystack.components.embedders import HuggingFaceAPITextEmbedder
49
    from haystack.utils import Secret
50
    text_embedder = HuggingFaceAPITextEmbedder(api_type="inference_endpoints",
51
                                               api_params={"model": "BAAI/bge-small-en-v1.5"},
52
                                               token=Secret.from_token("<your-api-key>"))
53

54
    print(text_embedder.run("I love pizza!"))
55

56
    # {'embedding': [0.017020374536514282, -0.023255806416273117, ...],
57
    ```
58

59
    #### With self-hosted text embeddings inference
60

61
    ```python
62
    from haystack.components.embedders import HuggingFaceAPITextEmbedder
63
    from haystack.utils import Secret
64

65
    text_embedder = HuggingFaceAPITextEmbedder(api_type="text_embeddings_inference",
66
                                               api_params={"url": "http://localhost:8080"})
67

68
    print(text_embedder.run("I love pizza!"))
69

70
    # {'embedding': [0.017020374536514282, -0.023255806416273117, ...],
71
    ```
72
    """
73

74
    def __init__(
1✔
75
        self,
76
        api_type: Union[HFEmbeddingAPIType, str],
77
        api_params: Dict[str, str],
78
        token: Optional[Secret] = Secret.from_env_var(["HF_API_TOKEN", "HF_TOKEN"], strict=False),
79
        prefix: str = "",
80
        suffix: str = "",
81
        truncate: Optional[bool] = True,
82
        normalize: Optional[bool] = False,
83
    ):  # pylint: disable=too-many-positional-arguments
84
        """
85
        Creates a HuggingFaceAPITextEmbedder component.
86

87
        :param api_type:
88
            The type of Hugging Face API to use.
89
        :param api_params:
90
            A dictionary with the following keys:
91
            - `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`.
92
            - `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or
93
            `TEXT_EMBEDDINGS_INFERENCE`.
94
        :param token: The Hugging Face token to use as HTTP bearer authorization.
95
            Check your HF token in your [account settings](https://huggingface.co/settings/tokens).
96
        :param prefix:
97
            A string to add at the beginning of each text.
98
        :param suffix:
99
            A string to add at the end of each text.
100
        :param truncate:
101
            Truncates the input text to the maximum length supported by the model.
102
            Applicable when `api_type` is `TEXT_EMBEDDINGS_INFERENCE`, or `INFERENCE_ENDPOINTS`
103
            if the backend uses Text Embeddings Inference.
104
            If `api_type` is `SERVERLESS_INFERENCE_API`, this parameter is ignored.
105
        :param normalize:
106
            Normalizes the embeddings to unit length.
107
            Applicable when `api_type` is `TEXT_EMBEDDINGS_INFERENCE`, or `INFERENCE_ENDPOINTS`
108
            if the backend uses Text Embeddings Inference.
109
            If `api_type` is `SERVERLESS_INFERENCE_API`, this parameter is ignored.
110
        """
111
        huggingface_hub_import.check()
1✔
112

113
        if isinstance(api_type, str):
1✔
114
            api_type = HFEmbeddingAPIType.from_str(api_type)
1✔
115

116
        if api_type == HFEmbeddingAPIType.SERVERLESS_INFERENCE_API:
1✔
117
            model = api_params.get("model")
1✔
118
            if model is None:
1✔
119
                raise ValueError(
1✔
120
                    "To use the Serverless Inference API, you need to specify the `model` parameter in `api_params`."
121
                )
122
            check_valid_model(model, HFModelType.EMBEDDING, token)
1✔
123
            model_or_url = model
1✔
124
        elif api_type in [HFEmbeddingAPIType.INFERENCE_ENDPOINTS, HFEmbeddingAPIType.TEXT_EMBEDDINGS_INFERENCE]:
1✔
125
            url = api_params.get("url")
1✔
126
            if url is None:
1✔
127
                msg = (
1✔
128
                    "To use Text Embeddings Inference or Inference Endpoints, you need to specify the `url` "
129
                    "parameter in `api_params`."
130
                )
131
                raise ValueError(msg)
1✔
132
            if not is_valid_http_url(url):
1✔
133
                raise ValueError(f"Invalid URL: {url}")
1✔
134
            model_or_url = url
1✔
135
        else:
136
            msg = f"Unknown api_type {api_type}"
×
137
            raise ValueError(msg)
×
138

139
        self.api_type = api_type
1✔
140
        self.api_params = api_params
1✔
141
        self.token = token
1✔
142
        self.prefix = prefix
1✔
143
        self.suffix = suffix
1✔
144
        self.truncate = truncate
1✔
145
        self.normalize = normalize
1✔
146
        self._client = InferenceClient(model_or_url, token=token.resolve_value() if token else None)
1✔
147

148
    def to_dict(self) -> Dict[str, Any]:
1✔
149
        """
150
        Serializes the component to a dictionary.
151

152
        :returns:
153
            Dictionary with serialized data.
154
        """
155
        return default_to_dict(
1✔
156
            self,
157
            api_type=str(self.api_type),
158
            api_params=self.api_params,
159
            prefix=self.prefix,
160
            suffix=self.suffix,
161
            token=self.token.to_dict() if self.token else None,
162
            truncate=self.truncate,
163
            normalize=self.normalize,
164
        )
165

166
    @classmethod
1✔
167
    def from_dict(cls, data: Dict[str, Any]) -> "HuggingFaceAPITextEmbedder":
1✔
168
        """
169
        Deserializes the component from a dictionary.
170

171
        :param data:
172
            Dictionary to deserialize from.
173
        :returns:
174
            Deserialized component.
175
        """
176
        deserialize_secrets_inplace(data["init_parameters"], keys=["token"])
1✔
177
        return default_from_dict(cls, data)
1✔
178

179
    @component.output_types(embedding=List[float])
1✔
180
    def run(self, text: str):
1✔
181
        """
182
        Embeds a single string.
183

184
        :param text:
185
            Text to embed.
186

187
        :returns:
188
            A dictionary with the following keys:
189
            - `embedding`: The embedding of the input text.
190
        """
191
        if not isinstance(text, str):
1✔
192
            raise TypeError(
1✔
193
                "HuggingFaceAPITextEmbedder expects a string as an input."
194
                "In case you want to embed a list of Documents, please use the HuggingFaceAPIDocumentEmbedder."
195
            )
196

197
        truncate = self.truncate
1✔
198
        normalize = self.normalize
1✔
199

200
        if self.api_type == HFEmbeddingAPIType.SERVERLESS_INFERENCE_API:
1✔
201
            if truncate is not None:
1✔
202
                msg = "`truncate` parameter is not supported for Serverless Inference API. It will be ignored."
1✔
203
                warnings.warn(msg)
1✔
204
                truncate = None
1✔
205
            if normalize is not None:
1✔
206
                msg = "`normalize` parameter is not supported for Serverless Inference API. It will be ignored."
1✔
207
                warnings.warn(msg)
1✔
208
                normalize = None
1✔
209

210
        text_to_embed = self.prefix + text + self.suffix
1✔
211

212
        np_embedding = self._client.feature_extraction(text=text_to_embed, truncate=truncate, normalize=normalize)
1✔
213

214
        error_msg = f"Expected embedding shape (1, embedding_dim) or (embedding_dim,), got {np_embedding.shape}"
1✔
215
        if np_embedding.ndim > 2:
1✔
216
            raise ValueError(error_msg)
1✔
217
        if np_embedding.ndim == 2 and np_embedding.shape[0] != 1:
1✔
218
            raise ValueError(error_msg)
1✔
219

220
        embedding = np_embedding.flatten().tolist()
1✔
221

222
        return {"embedding": embedding}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc