• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 18647136217

20 Oct 2025 08:52AM UTC coverage: 92.179% (-0.04%) from 92.22%
18647136217

Pull #9856

github

web-flow
Merge dc9eda57a into 1de94413c
Pull Request #9856: Add Tools warm_up

13425 of 14564 relevant lines covered (92.18%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.25
haystack/tools/component_tool.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from typing import Any, Callable, Optional, Union, get_args, get_origin
1✔
6

7
from pydantic import Field, TypeAdapter, create_model
1✔
8

9
from haystack import logging
1✔
10
from haystack.core.component import Component
1✔
11
from haystack.core.serialization import (
1✔
12
    component_from_dict,
13
    component_to_dict,
14
    generate_qualified_class_name,
15
    import_class_by_name,
16
)
17
from haystack.tools import Tool
1✔
18
from haystack.tools.errors import SchemaGenerationError
1✔
19
from haystack.tools.from_function import _remove_title_from_schema
1✔
20
from haystack.tools.parameters_schema_utils import _get_component_param_descriptions, _resolve_type
1✔
21
from haystack.tools.tool import _deserialize_outputs_to_state, _serialize_outputs_to_state
1✔
22
from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
1✔
23

24
logger = logging.getLogger(__name__)
1✔
25

26

27
class ComponentTool(Tool):
1✔
28
    """
29
    A Tool that wraps Haystack components, allowing them to be used as tools by LLMs.
30

31
    ComponentTool automatically generates LLM-compatible tool schemas from component input sockets,
32
    which are derived from the component's `run` method signature and type hints.
33

34

35
    Key features:
36
    - Automatic LLM tool calling schema generation from component input sockets
37
    - Type conversion and validation for component inputs
38
    - Support for types:
39
        - Dataclasses
40
        - Lists of dataclasses
41
        - Basic types (str, int, float, bool, dict)
42
        - Lists of basic types
43
    - Automatic name generation from component class name
44
    - Description extraction from component docstrings
45

46
    To use ComponentTool, you first need a Haystack component - either an existing one or a new one you create.
47
    You can create a ComponentTool from the component by passing the component to the ComponentTool constructor.
48
    Below is an example of creating a ComponentTool from an existing SerperDevWebSearch component.
49

50
    ## Usage Example:
51

52
    ```python
53
    from haystack import component, Pipeline
54
    from haystack.tools import ComponentTool
55
    from haystack.components.websearch import SerperDevWebSearch
56
    from haystack.utils import Secret
57
    from haystack.components.tools.tool_invoker import ToolInvoker
58
    from haystack.components.generators.chat import OpenAIChatGenerator
59
    from haystack.dataclasses import ChatMessage
60

61
    # Create a SerperDev search component
62
    search = SerperDevWebSearch(api_key=Secret.from_env_var("SERPERDEV_API_KEY"), top_k=3)
63

64
    # Create a tool from the component
65
    tool = ComponentTool(
66
        component=search,
67
        name="web_search",  # Optional: defaults to "serper_dev_web_search"
68
        description="Search the web for current information on any topic"  # Optional: defaults to component docstring
69
    )
70

71
    # Create pipeline with OpenAIChatGenerator and ToolInvoker
72
    pipeline = Pipeline()
73
    pipeline.add_component("llm", OpenAIChatGenerator(model="gpt-4o-mini", tools=[tool]))
74
    pipeline.add_component("tool_invoker", ToolInvoker(tools=[tool]))
75

76
    # Connect components
77
    pipeline.connect("llm.replies", "tool_invoker.messages")
78

79
    message = ChatMessage.from_user("Use the web search tool to find information about Nikola Tesla")
80

81
    # Run pipeline
82
    result = pipeline.run({"llm": {"messages": [message]}})
83

84
    print(result)
85
    ```
86

87
    """
88

89
    def __init__(
1✔
90
        self,
91
        component: Component,
92
        name: Optional[str] = None,
93
        description: Optional[str] = None,
94
        parameters: Optional[dict[str, Any]] = None,
95
        *,
96
        outputs_to_string: Optional[dict[str, Union[str, Callable[[Any], str]]]] = None,
97
        inputs_from_state: Optional[dict[str, str]] = None,
98
        outputs_to_state: Optional[dict[str, dict[str, Union[str, Callable]]]] = None,
99
    ) -> None:
100
        """
101
        Create a Tool instance from a Haystack component.
102

103
        :param component: The Haystack component to wrap as a tool.
104
        :param name: Optional name for the tool (defaults to snake_case of component class name).
105
        :param description: Optional description (defaults to component's docstring).
106
        :param parameters:
107
            A JSON schema defining the parameters expected by the Tool.
108
            Will fall back to the parameters defined in the component's run method signature if not provided.
109
        :param outputs_to_string:
110
            Optional dictionary defining how a tool outputs should be converted into a string.
111
            If the source is provided only the specified output key is sent to the handler.
112
            If the source is omitted the whole tool result is sent to the handler.
113
            Example:
114
            ```python
115
            {
116
                "source": "docs", "handler": format_documents
117
            }
118
            ```
119
        :param inputs_from_state:
120
            Optional dictionary mapping state keys to tool parameter names.
121
            Example: `{"repository": "repo"}` maps state's "repository" to tool's "repo" parameter.
122
        :param outputs_to_state:
123
            Optional dictionary defining how tool outputs map to keys within state as well as optional handlers.
124
            If the source is provided only the specified output key is sent to the handler.
125
            Example:
126
            ```python
127
            {
128
                "documents": {"source": "docs", "handler": custom_handler}
129
            }
130
            ```
131
            If the source is omitted the whole tool result is sent to the handler.
132
            Example:
133
            ```python
134
            {
135
                "documents": {"handler": custom_handler}
136
            }
137
            ```
138
        :raises ValueError: If the component is invalid or schema generation fails.
139
        """
140
        if not isinstance(component, Component):
1✔
141
            message = (
1✔
142
                f"Object {component!r} is not a Haystack component. "
143
                "Use ComponentTool only with Haystack component instances."
144
            )
145
            raise ValueError(message)
1✔
146

147
        if getattr(component, "__haystack_added_to_pipeline__", None):
1✔
148
            msg = (
1✔
149
                "Component has been added to a pipeline and can't be used to create a ComponentTool. "
150
                "Create ComponentTool from a non-pipeline component instead."
151
            )
152
            raise ValueError(msg)
1✔
153

154
        self._unresolved_parameters = parameters
1✔
155
        # Create the tools schema from the component run method parameters
156
        tool_schema = parameters or self._create_tool_parameters_schema(component, inputs_from_state or {})
1✔
157

158
        def component_invoker(**kwargs):
1✔
159
            """
160
            Invokes the component using keyword arguments provided by the LLM function calling/tool-generated response.
161

162
            :param kwargs: The keyword arguments to invoke the component with.
163
            :returns: The result of the component invocation.
164
            """
165
            converted_kwargs = {}
1✔
166
            input_sockets = component.__haystack_input__._sockets_dict  # type: ignore[attr-defined]
1✔
167
            for param_name, param_value in kwargs.items():
1✔
168
                param_type = input_sockets[param_name].type
1✔
169

170
                # Check if the type (or list element type) has from_dict
171
                target_type = get_args(param_type)[0] if get_origin(param_type) is list else param_type
1✔
172
                if hasattr(target_type, "from_dict"):
1✔
173
                    if isinstance(param_value, list):
1✔
174
                        resolved_param_value = [
1✔
175
                            target_type.from_dict(item) if isinstance(item, dict) else item for item in param_value
176
                        ]
177
                    elif isinstance(param_value, dict):
×
178
                        resolved_param_value = target_type.from_dict(param_value)
×
179
                    else:
180
                        resolved_param_value = param_value
×
181
                else:
182
                    # Let TypeAdapter handle both single values and lists
183
                    type_adapter = TypeAdapter(param_type)
1✔
184
                    resolved_param_value = type_adapter.validate_python(param_value)
1✔
185

186
                converted_kwargs[param_name] = resolved_param_value
1✔
187
            logger.debug(f"Invoking component {type(component)} with kwargs: {converted_kwargs}")
1✔
188
            return component.run(**converted_kwargs)
1✔
189

190
        # Generate a name for the tool if not provided
191
        if not name:
1✔
192
            class_name = component.__class__.__name__
1✔
193
            # Convert camelCase/PascalCase to snake_case
194
            name = "".join(
1✔
195
                [
196
                    "_" + c.lower() if c.isupper() and i > 0 and not class_name[i - 1].isupper() else c.lower()
197
                    for i, c in enumerate(class_name)
198
                ]
199
            ).lstrip("_")
200

201
        description = description or component.__doc__ or name
1✔
202

203
        # Create the Tool instance with the component invoker as the function to be called and the schema
204
        super().__init__(
1✔
205
            name=name,
206
            description=description,
207
            parameters=tool_schema,
208
            function=component_invoker,
209
            inputs_from_state=inputs_from_state,
210
            outputs_to_state=outputs_to_state,
211
            outputs_to_string=outputs_to_string,
212
        )
213
        self._component = component
1✔
214
        self._is_warmed_up = False
1✔
215

216
    def warm_up(self):
1✔
217
        """
218
        Prepare the ComponentTool for use.
219
        """
220
        if not self._is_warmed_up:
1✔
221
            if hasattr(self._component, "warm_up"):
1✔
222
                self._component.warm_up()
1✔
223
            self._is_warmed_up = True
1✔
224

225
    def to_dict(self) -> dict[str, Any]:
1✔
226
        """
227
        Serializes the ComponentTool to a dictionary.
228
        """
229
        serialized: dict[str, Any] = {
1✔
230
            "component": component_to_dict(obj=self._component, name=self.name),
231
            "name": self.name,
232
            "description": self.description,
233
            "parameters": self._unresolved_parameters,
234
            "inputs_from_state": self.inputs_from_state,
235
            "outputs_to_state": _serialize_outputs_to_state(self.outputs_to_state) if self.outputs_to_state else None,
236
        }
237

238
        if self.outputs_to_string is not None and self.outputs_to_string.get("handler") is not None:
1✔
239
            # This is soft-copied as to not modify the attributes in place
240
            serialized["outputs_to_string"] = self.outputs_to_string.copy()
1✔
241
            serialized["outputs_to_string"]["handler"] = serialize_callable(self.outputs_to_string["handler"])
1✔
242
        else:
243
            serialized["outputs_to_string"] = None
1✔
244

245
        return {"type": generate_qualified_class_name(type(self)), "data": serialized}
1✔
246

247
    @classmethod
1✔
248
    def from_dict(cls, data: dict[str, Any]) -> "ComponentTool":
1✔
249
        """
250
        Deserializes the ComponentTool from a dictionary.
251
        """
252
        inner_data = data["data"]
1✔
253
        component_class = import_class_by_name(inner_data["component"]["type"])
1✔
254
        component = component_from_dict(cls=component_class, data=inner_data["component"], name=inner_data["name"])
1✔
255

256
        if "outputs_to_state" in inner_data and inner_data["outputs_to_state"]:
1✔
257
            inner_data["outputs_to_state"] = _deserialize_outputs_to_state(inner_data["outputs_to_state"])
1✔
258

259
        if (
1✔
260
            inner_data.get("outputs_to_string") is not None
261
            and inner_data["outputs_to_string"].get("handler") is not None
262
        ):
263
            inner_data["outputs_to_string"]["handler"] = deserialize_callable(
1✔
264
                inner_data["outputs_to_string"]["handler"]
265
            )
266

267
        return cls(
1✔
268
            component=component,
269
            name=inner_data["name"],
270
            description=inner_data["description"],
271
            parameters=inner_data.get("parameters", None),
272
            outputs_to_string=inner_data.get("outputs_to_string", None),
273
            inputs_from_state=inner_data.get("inputs_from_state", None),
274
            outputs_to_state=inner_data.get("outputs_to_state", None),
275
        )
276

277
    def _create_tool_parameters_schema(self, component: Component, inputs_from_state: dict[str, Any]) -> dict[str, Any]:
1✔
278
        """
279
        Creates an OpenAI tools schema from a component's run method parameters.
280

281
        :param component: The component to create the schema from.
282
        :raises SchemaGenerationError: If schema generation fails
283
        :returns: OpenAI tools schema for the component's run method parameters.
284
        """
285
        component_run_description, param_descriptions = _get_component_param_descriptions(component)
1✔
286

287
        # collect fields (types and defaults) and descriptions from function parameters
288
        fields: dict[str, Any] = {}
1✔
289

290
        for input_name, socket in component.__haystack_input__._sockets_dict.items():  # type: ignore[attr-defined]
1✔
291
            if inputs_from_state is not None and input_name in list(inputs_from_state.values()):
1✔
292
                continue
1✔
293
            input_type = socket.type
1✔
294
            description = param_descriptions.get(input_name, f"Input '{input_name}' for the component.")
1✔
295

296
            # if the parameter has not a default value, Pydantic requires an Ellipsis (...)
297
            # to explicitly indicate that the parameter is required
298
            default = ... if socket.is_mandatory else socket.default_value
1✔
299
            resolved_type = _resolve_type(input_type)
1✔
300
            fields[input_name] = (resolved_type, Field(default=default, description=description))
1✔
301

302
        parameters_schema: dict[str, Any] = {}
1✔
303
        try:
1✔
304
            model = create_model(component.run.__name__, __doc__=component_run_description, **fields)
1✔
305
            parameters_schema = model.model_json_schema()
1✔
306
        except Exception as e:
×
307
            raise SchemaGenerationError(
×
308
                f"Failed to create JSON schema for the run method of Component '{component.__class__.__name__}'"
309
            ) from e
310

311
        # we don't want to include title keywords in the schema, as they contain redundant information
312
        # there is no programmatic way to prevent Pydantic from adding them, so we remove them later
313
        # see https://github.com/pydantic/pydantic/discussions/8504
314
        _remove_title_from_schema(parameters_schema)
1✔
315

316
        return parameters_schema
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc