• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 20718306937

05 Jan 2026 02:21PM UTC coverage: 92.158% (+0.01%) from 92.146%
20718306937

push

github

web-flow
chore: Add noqa for post_init PLR0912 and C901 warnings (#10301)

* Add noqa for post_init PLR0912 and C901 failures

* pylint

14373 of 15596 relevant lines covered (92.16%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.51
haystack/tools/component_tool.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from typing import Any, Callable, get_args, get_origin
1✔
6

7
from pydantic import Field, TypeAdapter, create_model
1✔
8

9
from haystack import logging
1✔
10
from haystack.core.component import Component
1✔
11
from haystack.core.serialization import (
1✔
12
    component_from_dict,
13
    component_to_dict,
14
    generate_qualified_class_name,
15
    import_class_by_name,
16
)
17
from haystack.tools import Tool
1✔
18
from haystack.tools.errors import SchemaGenerationError
1✔
19
from haystack.tools.from_function import _remove_title_from_schema
1✔
20
from haystack.tools.parameters_schema_utils import _get_component_param_descriptions, _resolve_type
1✔
21
from haystack.tools.tool import _deserialize_outputs_to_state, _serialize_outputs_to_state
1✔
22
from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
1✔
23

24
logger = logging.getLogger(__name__)
1✔
25

26

27
class ComponentTool(Tool):
1✔
28
    """
29
    A Tool that wraps Haystack components, allowing them to be used as tools by LLMs.
30

31
    ComponentTool automatically generates LLM-compatible tool schemas from component input sockets,
32
    which are derived from the component's `run` method signature and type hints.
33

34

35
    Key features:
36
    - Automatic LLM tool calling schema generation from component input sockets
37
    - Type conversion and validation for component inputs
38
    - Support for types:
39
        - Dataclasses
40
        - Lists of dataclasses
41
        - Basic types (str, int, float, bool, dict)
42
        - Lists of basic types
43
    - Automatic name generation from component class name
44
    - Description extraction from component docstrings
45

46
    To use ComponentTool, you first need a Haystack component - either an existing one or a new one you create.
47
    You can create a ComponentTool from the component by passing the component to the ComponentTool constructor.
48
    Below is an example of creating a ComponentTool from an existing SerperDevWebSearch component.
49

50
    ## Usage Example:
51

52
    ```python
53
    from haystack import component, Pipeline
54
    from haystack.tools import ComponentTool
55
    from haystack.components.websearch import SerperDevWebSearch
56
    from haystack.utils import Secret
57
    from haystack.components.tools.tool_invoker import ToolInvoker
58
    from haystack.components.generators.chat import OpenAIChatGenerator
59
    from haystack.dataclasses import ChatMessage
60

61
    # Create a SerperDev search component
62
    search = SerperDevWebSearch(api_key=Secret.from_env_var("SERPERDEV_API_KEY"), top_k=3)
63

64
    # Create a tool from the component
65
    tool = ComponentTool(
66
        component=search,
67
        name="web_search",  # Optional: defaults to "serper_dev_web_search"
68
        description="Search the web for current information on any topic"  # Optional: defaults to component docstring
69
    )
70

71
    # Create pipeline with OpenAIChatGenerator and ToolInvoker
72
    pipeline = Pipeline()
73
    pipeline.add_component("llm", OpenAIChatGenerator(tools=[tool]))
74
    pipeline.add_component("tool_invoker", ToolInvoker(tools=[tool]))
75

76
    # Connect components
77
    pipeline.connect("llm.replies", "tool_invoker.messages")
78

79
    message = ChatMessage.from_user("Use the web search tool to find information about Nikola Tesla")
80

81
    # Run pipeline
82
    result = pipeline.run({"llm": {"messages": [message]}})
83

84
    print(result)
85
    ```
86

87
    """
88

89
    def __init__(
1✔
90
        self,
91
        component: Component,
92
        name: str | None = None,
93
        description: str | None = None,
94
        parameters: dict[str, Any] | None = None,
95
        *,
96
        outputs_to_string: dict[str, str | Callable[[Any], str]] | None = None,
97
        inputs_from_state: dict[str, str] | None = None,
98
        outputs_to_state: dict[str, dict[str, str | Callable]] | None = None,
99
    ) -> None:
100
        """
101
        Create a Tool instance from a Haystack component.
102

103
        :param component: The Haystack component to wrap as a tool.
104
        :param name: Optional name for the tool (defaults to snake_case of component class name).
105
        :param description: Optional description (defaults to component's docstring).
106
        :param parameters:
107
            A JSON schema defining the parameters expected by the Tool.
108
            Will fall back to the parameters defined in the component's run method signature if not provided.
109
        :param outputs_to_string:
110
            Optional dictionary defining how tool outputs should be converted into string(s).
111
            Supports two formats:
112

113
            1. Single output format - use "source" and/or "handler" at the root level:
114
                ```python
115
                {
116
                    "source": "docs", "handler": format_documents
117
                }
118
                ```
119
                If the source is provided, only the specified output key is sent to the handler.
120
                If the source is omitted, the whole tool result is sent to the handler.
121

122
            2. Multiple output format - map keys to individual configurations:
123
                ```python
124
                {
125
                    "formatted_docs": {"source": "docs", "handler": format_documents},
126
                    "summary": {"source": "summary_text", "handler": str.upper}
127
                }
128
                ```
129
                Each key maps to a dictionary that can contain "source" and/or "handler".
130
        :param inputs_from_state:
131
            Optional dictionary mapping state keys to tool parameter names.
132
            Example: `{"repository": "repo"}` maps state's "repository" to tool's "repo" parameter.
133
        :param outputs_to_state:
134
            Optional dictionary defining how tool outputs map to keys within state as well as optional handlers.
135
            If the source is provided only the specified output key is sent to the handler.
136
            Example:
137
            ```python
138
            {
139
                "documents": {"source": "docs", "handler": custom_handler}
140
            }
141
            ```
142
            If the source is omitted the whole tool result is sent to the handler.
143
            Example:
144
            ```python
145
            {
146
                "documents": {"handler": custom_handler}
147
            }
148
            ```
149
        :raises ValueError: If the component is invalid or schema generation fails.
150
        """
151
        if not isinstance(component, Component):
1✔
152
            message = (
1✔
153
                f"Object {component!r} is not a Haystack component. "
154
                "Use ComponentTool only with Haystack component instances."
155
            )
156
            raise ValueError(message)
1✔
157

158
        if getattr(component, "__haystack_added_to_pipeline__", None):
1✔
159
            msg = (
1✔
160
                "Component has been added to a pipeline and can't be used to create a ComponentTool. "
161
                "Create ComponentTool from a non-pipeline component instead."
162
            )
163
            raise ValueError(msg)
1✔
164

165
        self._unresolved_parameters = parameters
1✔
166
        # Create the tools schema from the component run method parameters
167
        tool_schema = parameters or self._create_tool_parameters_schema(component, inputs_from_state or {})
1✔
168

169
        def component_invoker(**kwargs):
1✔
170
            """
171
            Invokes the component using keyword arguments provided by the LLM function calling/tool-generated response.
172

173
            :param kwargs: The keyword arguments to invoke the component with.
174
            :returns: The result of the component invocation.
175
            """
176
            converted_kwargs = {}
1✔
177
            input_sockets = component.__haystack_input__._sockets_dict  # type: ignore[attr-defined]
1✔
178
            for param_name, param_value in kwargs.items():
1✔
179
                param_type = input_sockets[param_name].type
1✔
180

181
                # Check if the type (or list element type) has from_dict
182
                target_type = get_args(param_type)[0] if get_origin(param_type) is list else param_type
1✔
183
                if hasattr(target_type, "from_dict"):
1✔
184
                    if isinstance(param_value, list):
1✔
185
                        resolved_param_value = [
1✔
186
                            target_type.from_dict(item) if isinstance(item, dict) else item for item in param_value
187
                        ]
188
                    elif isinstance(param_value, dict):
×
189
                        resolved_param_value = target_type.from_dict(param_value)
×
190
                    else:
191
                        resolved_param_value = param_value
×
192
                else:
193
                    # Let TypeAdapter handle both single values and lists
194
                    type_adapter = TypeAdapter(param_type)
1✔
195
                    resolved_param_value = type_adapter.validate_python(param_value)
1✔
196

197
                converted_kwargs[param_name] = resolved_param_value
1✔
198
            logger.debug(f"Invoking component {type(component)} with kwargs: {converted_kwargs}")
1✔
199
            return component.run(**converted_kwargs)
1✔
200

201
        # Generate a name for the tool if not provided
202
        if not name:
1✔
203
            class_name = component.__class__.__name__
1✔
204
            # Convert camelCase/PascalCase to snake_case
205
            name = "".join(
1✔
206
                [
207
                    "_" + c.lower() if c.isupper() and i > 0 and not class_name[i - 1].isupper() else c.lower()
208
                    for i, c in enumerate(class_name)
209
                ]
210
            ).lstrip("_")
211

212
        description = description or component.__doc__ or name
1✔
213

214
        # Store component before calling super().__init__() so _get_valid_outputs() can access it
215
        self._component = component
1✔
216
        self._is_warmed_up = False
1✔
217

218
        # Create the Tool instance with the component invoker as the function to be called and the schema
219
        super().__init__(
1✔
220
            name=name,
221
            description=description,
222
            parameters=tool_schema,
223
            function=component_invoker,
224
            inputs_from_state=inputs_from_state,
225
            outputs_to_state=outputs_to_state,
226
            outputs_to_string=outputs_to_string,
227
        )
228

229
    def _get_valid_inputs(self) -> set[str]:
1✔
230
        """
231
        Return valid input parameter names from the component's input sockets.
232

233
        Used to validate `inputs_from_state` against the component's actual inputs.
234
        This ensures users don't reference non-existent component inputs.
235

236
        :returns: Set of component input socket names.
237
        """
238
        return set(self._component.__haystack_input__._sockets_dict.keys())  # type: ignore[attr-defined]
1✔
239

240
    def _get_valid_outputs(self) -> set[str]:
1✔
241
        """
242
        Return valid output names from the component's output sockets.
243

244
        Used to validate `outputs_to_state` against the component's actual outputs.
245
        This ensures users don't reference non-existent component outputs.
246

247
        :returns: Set of component output socket names.
248
        """
249
        return set(self._component.__haystack_output__._sockets_dict.keys())  # type: ignore[attr-defined]
1✔
250

251
    def warm_up(self):
1✔
252
        """
253
        Prepare the ComponentTool for use.
254
        """
255
        if not self._is_warmed_up:
1✔
256
            if hasattr(self._component, "warm_up"):
1✔
257
                self._component.warm_up()
1✔
258
            self._is_warmed_up = True
1✔
259

260
    def to_dict(self) -> dict[str, Any]:
1✔
261
        """
262
        Serializes the ComponentTool to a dictionary.
263
        """
264
        serialized: dict[str, Any] = {
1✔
265
            "component": component_to_dict(obj=self._component, name=self.name),
266
            "name": self.name,
267
            "description": self.description,
268
            "parameters": self._unresolved_parameters,
269
            "inputs_from_state": self.inputs_from_state,
270
            "outputs_to_state": _serialize_outputs_to_state(self.outputs_to_state) if self.outputs_to_state else None,
271
        }
272

273
        if self.outputs_to_string is not None and self.outputs_to_string.get("handler") is not None:
1✔
274
            # This is soft-copied as to not modify the attributes in place
275
            serialized["outputs_to_string"] = self.outputs_to_string.copy()
1✔
276
            serialized["outputs_to_string"]["handler"] = serialize_callable(self.outputs_to_string["handler"])
1✔
277
        else:
278
            serialized["outputs_to_string"] = None
1✔
279

280
        return {"type": generate_qualified_class_name(type(self)), "data": serialized}
1✔
281

282
    @classmethod
1✔
283
    def from_dict(cls, data: dict[str, Any]) -> "ComponentTool":
1✔
284
        """
285
        Deserializes the ComponentTool from a dictionary.
286
        """
287
        inner_data = data["data"]
1✔
288
        component_class = import_class_by_name(inner_data["component"]["type"])
1✔
289
        component = component_from_dict(cls=component_class, data=inner_data["component"], name=inner_data["name"])
1✔
290

291
        if "outputs_to_state" in inner_data and inner_data["outputs_to_state"]:
1✔
292
            inner_data["outputs_to_state"] = _deserialize_outputs_to_state(inner_data["outputs_to_state"])
1✔
293

294
        if (
1✔
295
            inner_data.get("outputs_to_string") is not None
296
            and inner_data["outputs_to_string"].get("handler") is not None
297
        ):
298
            inner_data["outputs_to_string"]["handler"] = deserialize_callable(
1✔
299
                inner_data["outputs_to_string"]["handler"]
300
            )
301

302
        return cls(
1✔
303
            component=component,
304
            name=inner_data["name"],
305
            description=inner_data["description"],
306
            parameters=inner_data.get("parameters", None),
307
            outputs_to_string=inner_data.get("outputs_to_string", None),
308
            inputs_from_state=inner_data.get("inputs_from_state", None),
309
            outputs_to_state=inner_data.get("outputs_to_state", None),
310
        )
311

312
    def _create_tool_parameters_schema(self, component: Component, inputs_from_state: dict[str, Any]) -> dict[str, Any]:
1✔
313
        """
314
        Creates an OpenAI tools schema from a component's run method parameters.
315

316
        :param component: The component to create the schema from.
317
        :raises SchemaGenerationError: If schema generation fails
318
        :returns: OpenAI tools schema for the component's run method parameters.
319
        """
320
        component_run_description, param_descriptions = _get_component_param_descriptions(component)
1✔
321

322
        # collect fields (types and defaults) and descriptions from function parameters
323
        fields: dict[str, Any] = {}
1✔
324

325
        for input_name, socket in component.__haystack_input__._sockets_dict.items():  # type: ignore[attr-defined]
1✔
326
            if inputs_from_state is not None and input_name in list(inputs_from_state.values()):
1✔
327
                continue
1✔
328
            input_type = socket.type
1✔
329
            description = param_descriptions.get(input_name, f"Input '{input_name}' for the component.")
1✔
330

331
            # if the parameter has not a default value, Pydantic requires an Ellipsis (...)
332
            # to explicitly indicate that the parameter is required
333
            default = ... if socket.is_mandatory else socket.default_value
1✔
334
            resolved_type = _resolve_type(input_type)
1✔
335
            fields[input_name] = (resolved_type, Field(default=default, description=description))
1✔
336

337
        parameters_schema: dict[str, Any] = {}
1✔
338
        try:
1✔
339
            model = create_model(component.run.__name__, __doc__=component_run_description, **fields)
1✔
340
            parameters_schema = model.model_json_schema()
1✔
341
        except Exception as e:
×
342
            raise SchemaGenerationError(
×
343
                f"Failed to create JSON schema for the run method of Component '{component.__class__.__name__}'"
344
            ) from e
345

346
        # we don't want to include title keywords in the schema, as they contain redundant information
347
        # there is no programmatic way to prevent Pydantic from adding them, so we remove them later
348
        # see https://github.com/pydantic/pydantic/discussions/8504
349
        _remove_title_from_schema(parameters_schema)
1✔
350

351
        return parameters_schema
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc