• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 20962091174

13 Jan 2026 03:19PM UTC coverage: 92.179% (+0.007%) from 92.172%
20962091174

push

github

web-flow
Skip callables in component tool schema generation (#10362)

* Skip callables in component tool schema generation (eg snapshot_callback)

* Release note

* Fix formatting on release note

* Update test/tools/test_component_tool.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Fix formatting

* Add same fix to create_tool_from_function

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

14414 of 15637 relevant lines covered (92.18%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.62
haystack/tools/component_tool.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from typing import Any, Callable, get_args, get_origin
1✔
6

7
from pydantic import Field, TypeAdapter, create_model
1✔
8

9
from haystack import logging
1✔
10
from haystack.core.component import Component
1✔
11
from haystack.core.serialization import (
1✔
12
    component_from_dict,
13
    component_to_dict,
14
    generate_qualified_class_name,
15
    import_class_by_name,
16
)
17
from haystack.tools import Tool
1✔
18
from haystack.tools.errors import SchemaGenerationError
1✔
19
from haystack.tools.from_function import _remove_title_from_schema
1✔
20
from haystack.tools.parameters_schema_utils import (
1✔
21
    _contains_callable_type,
22
    _get_component_param_descriptions,
23
    _resolve_type,
24
)
25
from haystack.tools.tool import _deserialize_outputs_to_state, _serialize_outputs_to_state
1✔
26
from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
1✔
27

28
logger = logging.getLogger(__name__)
1✔
29

30

31
class ComponentTool(Tool):
1✔
32
    """
33
    A Tool that wraps Haystack components, allowing them to be used as tools by LLMs.
34

35
    ComponentTool automatically generates LLM-compatible tool schemas from component input sockets,
36
    which are derived from the component's `run` method signature and type hints.
37

38

39
    Key features:
40
    - Automatic LLM tool calling schema generation from component input sockets
41
    - Type conversion and validation for component inputs
42
    - Support for types:
43
        - Dataclasses
44
        - Lists of dataclasses
45
        - Basic types (str, int, float, bool, dict)
46
        - Lists of basic types
47
    - Automatic name generation from component class name
48
    - Description extraction from component docstrings
49

50
    To use ComponentTool, you first need a Haystack component - either an existing one or a new one you create.
51
    You can create a ComponentTool from the component by passing the component to the ComponentTool constructor.
52
    Below is an example of creating a ComponentTool from an existing SerperDevWebSearch component.
53

54
    ## Usage Example:
55

56
    ```python
57
    from haystack import component, Pipeline
58
    from haystack.tools import ComponentTool
59
    from haystack.components.websearch import SerperDevWebSearch
60
    from haystack.utils import Secret
61
    from haystack.components.tools.tool_invoker import ToolInvoker
62
    from haystack.components.generators.chat import OpenAIChatGenerator
63
    from haystack.dataclasses import ChatMessage
64

65
    # Create a SerperDev search component
66
    search = SerperDevWebSearch(api_key=Secret.from_env_var("SERPERDEV_API_KEY"), top_k=3)
67

68
    # Create a tool from the component
69
    tool = ComponentTool(
70
        component=search,
71
        name="web_search",  # Optional: defaults to "serper_dev_web_search"
72
        description="Search the web for current information on any topic"  # Optional: defaults to component docstring
73
    )
74

75
    # Create pipeline with OpenAIChatGenerator and ToolInvoker
76
    pipeline = Pipeline()
77
    pipeline.add_component("llm", OpenAIChatGenerator(tools=[tool]))
78
    pipeline.add_component("tool_invoker", ToolInvoker(tools=[tool]))
79

80
    # Connect components
81
    pipeline.connect("llm.replies", "tool_invoker.messages")
82

83
    message = ChatMessage.from_user("Use the web search tool to find information about Nikola Tesla")
84

85
    # Run pipeline
86
    result = pipeline.run({"llm": {"messages": [message]}})
87

88
    print(result)
89
    ```
90

91
    """
92

93
    def __init__(
1✔
94
        self,
95
        component: Component,
96
        name: str | None = None,
97
        description: str | None = None,
98
        parameters: dict[str, Any] | None = None,
99
        *,
100
        outputs_to_string: dict[str, str | Callable[[Any], str]] | None = None,
101
        inputs_from_state: dict[str, str] | None = None,
102
        outputs_to_state: dict[str, dict[str, str | Callable]] | None = None,
103
    ) -> None:
104
        """
105
        Create a Tool instance from a Haystack component.
106

107
        :param component: The Haystack component to wrap as a tool.
108
        :param name: Optional name for the tool (defaults to snake_case of component class name).
109
        :param description: Optional description (defaults to component's docstring).
110
        :param parameters:
111
            A JSON schema defining the parameters expected by the Tool.
112
            Will fall back to the parameters defined in the component's run method signature if not provided.
113
        :param outputs_to_string:
114
            Optional dictionary defining how tool outputs should be converted into string(s).
115
            Supports two formats:
116

117
            1. Single output format - use "source" and/or "handler" at the root level:
118
                ```python
119
                {
120
                    "source": "docs", "handler": format_documents
121
                }
122
                ```
123
                If the source is provided, only the specified output key is sent to the handler.
124
                If the source is omitted, the whole tool result is sent to the handler.
125

126
            2. Multiple output format - map keys to individual configurations:
127
                ```python
128
                {
129
                    "formatted_docs": {"source": "docs", "handler": format_documents},
130
                    "summary": {"source": "summary_text", "handler": str.upper}
131
                }
132
                ```
133
                Each key maps to a dictionary that can contain "source" and/or "handler".
134
        :param inputs_from_state:
135
            Optional dictionary mapping state keys to tool parameter names.
136
            Example: `{"repository": "repo"}` maps state's "repository" to tool's "repo" parameter.
137
        :param outputs_to_state:
138
            Optional dictionary defining how tool outputs map to keys within state as well as optional handlers.
139
            If the source is provided only the specified output key is sent to the handler.
140
            Example:
141
            ```python
142
            {
143
                "documents": {"source": "docs", "handler": custom_handler}
144
            }
145
            ```
146
            If the source is omitted the whole tool result is sent to the handler.
147
            Example:
148
            ```python
149
            {
150
                "documents": {"handler": custom_handler}
151
            }
152
            ```
153
        :raises ValueError: If the component is invalid or schema generation fails.
154
        """
155
        if not isinstance(component, Component):
1✔
156
            message = (
1✔
157
                f"Object {component!r} is not a Haystack component. "
158
                "Use ComponentTool only with Haystack component instances."
159
            )
160
            raise ValueError(message)
1✔
161

162
        if getattr(component, "__haystack_added_to_pipeline__", None):
1✔
163
            msg = (
1✔
164
                "Component has been added to a pipeline and can't be used to create a ComponentTool. "
165
                "Create ComponentTool from a non-pipeline component instead."
166
            )
167
            raise ValueError(msg)
1✔
168

169
        self._unresolved_parameters = parameters
1✔
170
        # Create the tools schema from the component run method parameters
171
        tool_schema = parameters or self._create_tool_parameters_schema(component, inputs_from_state or {})
1✔
172

173
        def component_invoker(**kwargs):
1✔
174
            """
175
            Invokes the component using keyword arguments provided by the LLM function calling/tool-generated response.
176

177
            :param kwargs: The keyword arguments to invoke the component with.
178
            :returns: The result of the component invocation.
179
            """
180
            converted_kwargs = {}
1✔
181
            input_sockets = component.__haystack_input__._sockets_dict  # type: ignore[attr-defined]
1✔
182
            for param_name, param_value in kwargs.items():
1✔
183
                param_type = input_sockets[param_name].type
1✔
184

185
                # Check if the type (or list element type) has from_dict
186
                target_type = get_args(param_type)[0] if get_origin(param_type) is list else param_type
1✔
187
                if hasattr(target_type, "from_dict"):
1✔
188
                    if isinstance(param_value, list):
1✔
189
                        resolved_param_value = [
1✔
190
                            target_type.from_dict(item) if isinstance(item, dict) else item for item in param_value
191
                        ]
192
                    elif isinstance(param_value, dict):
×
193
                        resolved_param_value = target_type.from_dict(param_value)
×
194
                    else:
195
                        resolved_param_value = param_value
×
196
                else:
197
                    # Let TypeAdapter handle both single values and lists
198
                    type_adapter = TypeAdapter(param_type)
1✔
199
                    resolved_param_value = type_adapter.validate_python(param_value)
1✔
200

201
                converted_kwargs[param_name] = resolved_param_value
1✔
202
            logger.debug(f"Invoking component {type(component)} with kwargs: {converted_kwargs}")
1✔
203
            return component.run(**converted_kwargs)
1✔
204

205
        # Generate a name for the tool if not provided
206
        if not name:
1✔
207
            class_name = component.__class__.__name__
1✔
208
            # Convert camelCase/PascalCase to snake_case
209
            name = "".join(
1✔
210
                [
211
                    "_" + c.lower() if c.isupper() and i > 0 and not class_name[i - 1].isupper() else c.lower()
212
                    for i, c in enumerate(class_name)
213
                ]
214
            ).lstrip("_")
215

216
        description = description or component.__doc__ or name
1✔
217

218
        # Store component before calling super().__init__() so _get_valid_outputs() can access it
219
        self._component = component
1✔
220
        self._is_warmed_up = False
1✔
221

222
        # Create the Tool instance with the component invoker as the function to be called and the schema
223
        super().__init__(
1✔
224
            name=name,
225
            description=description,
226
            parameters=tool_schema,
227
            function=component_invoker,
228
            inputs_from_state=inputs_from_state,
229
            outputs_to_state=outputs_to_state,
230
            outputs_to_string=outputs_to_string,
231
        )
232

233
    def _get_valid_inputs(self) -> set[str]:
1✔
234
        """
235
        Return valid input parameter names from the component's input sockets.
236

237
        Used to validate `inputs_from_state` against the component's actual inputs.
238
        This ensures users don't reference non-existent component inputs.
239

240
        :returns: Set of component input socket names.
241
        """
242
        return set(self._component.__haystack_input__._sockets_dict.keys())  # type: ignore[attr-defined]
1✔
243

244
    def _get_valid_outputs(self) -> set[str]:
1✔
245
        """
246
        Return valid output names from the component's output sockets.
247

248
        Used to validate `outputs_to_state` against the component's actual outputs.
249
        This ensures users don't reference non-existent component outputs.
250

251
        :returns: Set of component output socket names.
252
        """
253
        return set(self._component.__haystack_output__._sockets_dict.keys())  # type: ignore[attr-defined]
1✔
254

255
    def warm_up(self):
1✔
256
        """
257
        Prepare the ComponentTool for use.
258
        """
259
        if not self._is_warmed_up:
1✔
260
            if hasattr(self._component, "warm_up"):
1✔
261
                self._component.warm_up()
1✔
262
            self._is_warmed_up = True
1✔
263

264
    def to_dict(self) -> dict[str, Any]:
1✔
265
        """
266
        Serializes the ComponentTool to a dictionary.
267
        """
268
        serialized: dict[str, Any] = {
1✔
269
            "component": component_to_dict(obj=self._component, name=self.name),
270
            "name": self.name,
271
            "description": self.description,
272
            "parameters": self._unresolved_parameters,
273
            "inputs_from_state": self.inputs_from_state,
274
            "outputs_to_state": _serialize_outputs_to_state(self.outputs_to_state) if self.outputs_to_state else None,
275
        }
276

277
        if self.outputs_to_string is not None and self.outputs_to_string.get("handler") is not None:
1✔
278
            # This is soft-copied as to not modify the attributes in place
279
            serialized["outputs_to_string"] = self.outputs_to_string.copy()
1✔
280
            serialized["outputs_to_string"]["handler"] = serialize_callable(self.outputs_to_string["handler"])
1✔
281
        else:
282
            serialized["outputs_to_string"] = None
1✔
283

284
        return {"type": generate_qualified_class_name(type(self)), "data": serialized}
1✔
285

286
    @classmethod
1✔
287
    def from_dict(cls, data: dict[str, Any]) -> "ComponentTool":
1✔
288
        """
289
        Deserializes the ComponentTool from a dictionary.
290
        """
291
        inner_data = data["data"]
1✔
292
        component_class = import_class_by_name(inner_data["component"]["type"])
1✔
293
        component = component_from_dict(cls=component_class, data=inner_data["component"], name=inner_data["name"])
1✔
294

295
        if "outputs_to_state" in inner_data and inner_data["outputs_to_state"]:
1✔
296
            inner_data["outputs_to_state"] = _deserialize_outputs_to_state(inner_data["outputs_to_state"])
1✔
297

298
        if (
1✔
299
            inner_data.get("outputs_to_string") is not None
300
            and inner_data["outputs_to_string"].get("handler") is not None
301
        ):
302
            inner_data["outputs_to_string"]["handler"] = deserialize_callable(
1✔
303
                inner_data["outputs_to_string"]["handler"]
304
            )
305

306
        return cls(
1✔
307
            component=component,
308
            name=inner_data["name"],
309
            description=inner_data["description"],
310
            parameters=inner_data.get("parameters", None),
311
            outputs_to_string=inner_data.get("outputs_to_string", None),
312
            inputs_from_state=inner_data.get("inputs_from_state", None),
313
            outputs_to_state=inner_data.get("outputs_to_state", None),
314
        )
315

316
    def _create_tool_parameters_schema(self, component: Component, inputs_from_state: dict[str, Any]) -> dict[str, Any]:
1✔
317
        """
318
        Creates an OpenAI tools schema from a component's run method parameters.
319

320
        :param component: The component to create the schema from.
321
        :raises SchemaGenerationError: If schema generation fails
322
        :returns: OpenAI tools schema for the component's run method parameters.
323
        """
324
        component_run_description, param_descriptions = _get_component_param_descriptions(component)
1✔
325

326
        # collect fields (types and defaults) and descriptions from function parameters
327
        fields: dict[str, Any] = {}
1✔
328

329
        for input_name, socket in component.__haystack_input__._sockets_dict.items():  # type: ignore[attr-defined]
1✔
330
            if inputs_from_state is not None and input_name in list(inputs_from_state.values()):
1✔
331
                continue
1✔
332
            input_type = socket.type
1✔
333

334
            # Skip Callable types since Pydantic cannot generate JSON schemas for them
335
            if _contains_callable_type(input_type):
1✔
336
                continue
1✔
337

338
            description = param_descriptions.get(input_name, f"Input '{input_name}' for the component.")
1✔
339

340
            # if the parameter has not a default value, Pydantic requires an Ellipsis (...)
341
            # to explicitly indicate that the parameter is required
342
            default = ... if socket.is_mandatory else socket.default_value
1✔
343
            resolved_type = _resolve_type(input_type)
1✔
344
            fields[input_name] = (resolved_type, Field(default=default, description=description))
1✔
345

346
        parameters_schema: dict[str, Any] = {}
1✔
347
        try:
1✔
348
            model = create_model(component.run.__name__, __doc__=component_run_description, **fields)
1✔
349
            parameters_schema = model.model_json_schema()
1✔
350
        except Exception as e:
×
351
            raise SchemaGenerationError(
×
352
                f"Failed to create JSON schema for the run method of Component '{component.__class__.__name__}'"
353
            ) from e
354

355
        # we don't want to include title keywords in the schema, as they contain redundant information
356
        # there is no programmatic way to prevent Pydantic from adding them, so we remove them later
357
        # see https://github.com/pydantic/pydantic/discussions/8504
358
        _remove_title_from_schema(parameters_schema)
1✔
359

360
        return parameters_schema
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc