• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 14858889103

06 May 2025 11:49AM UTC coverage: 90.415% (+0.005%) from 90.41%
14858889103

push

github

web-flow
fix: Update deepcopying in Pipeline to have a fallback in case of error (#9346)

* First pass at fix for deepcopying inputs and outputs

* Add reno

* Add recursion for dict objects

* Bump recursion depth

* More tests and some improvments

* Fix unit tests

* PR comments

10914 of 12071 relevant lines covered (90.42%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.2
haystack/tools/component_tool.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
from dataclasses import fields, is_dataclass
1✔
6
from inspect import getdoc
1✔
7
from typing import Any, Callable, Dict, Optional, Union, get_args, get_origin
1✔
8

9
from pydantic import TypeAdapter
1✔
10

11
from haystack import logging
1✔
12
from haystack.core.component import Component
1✔
13
from haystack.core.serialization import (
1✔
14
    component_from_dict,
15
    component_to_dict,
16
    generate_qualified_class_name,
17
    import_class_by_name,
18
)
19
from haystack.lazy_imports import LazyImport
1✔
20
from haystack.tools import Tool
1✔
21
from haystack.tools.errors import SchemaGenerationError
1✔
22
from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
1✔
23

24
with LazyImport(message="Run 'pip install docstring-parser'") as docstring_parser_import:
1✔
25
    from docstring_parser import parse
1✔
26

27

28
logger = logging.getLogger(__name__)
1✔
29

30

31
class ComponentTool(Tool):
1✔
32
    """
33
    A Tool that wraps Haystack components, allowing them to be used as tools by LLMs.
34

35
    ComponentTool automatically generates LLM-compatible tool schemas from component input sockets,
36
    which are derived from the component's `run` method signature and type hints.
37

38

39
    Key features:
40
    - Automatic LLM tool calling schema generation from component input sockets
41
    - Type conversion and validation for component inputs
42
    - Support for types:
43
        - Dataclasses
44
        - Lists of dataclasses
45
        - Basic types (str, int, float, bool, dict)
46
        - Lists of basic types
47
    - Automatic name generation from component class name
48
    - Description extraction from component docstrings
49

50
    To use ComponentTool, you first need a Haystack component - either an existing one or a new one you create.
51
    You can create a ComponentTool from the component by passing the component to the ComponentTool constructor.
52
    Below is an example of creating a ComponentTool from an existing SerperDevWebSearch component.
53

54
    ```python
55
    from haystack import component, Pipeline
56
    from haystack.tools import ComponentTool
57
    from haystack.components.websearch import SerperDevWebSearch
58
    from haystack.utils import Secret
59
    from haystack.components.tools.tool_invoker import ToolInvoker
60
    from haystack.components.generators.chat import OpenAIChatGenerator
61
    from haystack.dataclasses import ChatMessage
62

63
    # Create a SerperDev search component
64
    search = SerperDevWebSearch(api_key=Secret.from_env_var("SERPERDEV_API_KEY"), top_k=3)
65

66
    # Create a tool from the component
67
    tool = ComponentTool(
68
        component=search,
69
        name="web_search",  # Optional: defaults to "serper_dev_web_search"
70
        description="Search the web for current information on any topic"  # Optional: defaults to component docstring
71
    )
72

73
    # Create pipeline with OpenAIChatGenerator and ToolInvoker
74
    pipeline = Pipeline()
75
    pipeline.add_component("llm", OpenAIChatGenerator(model="gpt-4o-mini", tools=[tool]))
76
    pipeline.add_component("tool_invoker", ToolInvoker(tools=[tool]))
77

78
    # Connect components
79
    pipeline.connect("llm.replies", "tool_invoker.messages")
80

81
    message = ChatMessage.from_user("Use the web search tool to find information about Nikola Tesla")
82

83
    # Run pipeline
84
    result = pipeline.run({"llm": {"messages": [message]}})
85

86
    print(result)
87
    ```
88

89
    """
90

91
    def __init__(
1✔
92
        self,
93
        component: Component,
94
        name: Optional[str] = None,
95
        description: Optional[str] = None,
96
        parameters: Optional[Dict[str, Any]] = None,
97
        *,
98
        outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None,
99
        inputs_from_state: Optional[Dict[str, str]] = None,
100
        outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None,
101
    ):
102
        """
103
        Create a Tool instance from a Haystack component.
104

105
        :param component: The Haystack component to wrap as a tool.
106
        :param name: Optional name for the tool (defaults to snake_case of component class name).
107
        :param description: Optional description (defaults to component's docstring).
108
        :param parameters:
109
            A JSON schema defining the parameters expected by the Tool.
110
            Will fall back to the parameters defined in the component's run method signature if not provided.
111
        :param outputs_to_string:
112
            Optional dictionary defining how a tool outputs should be converted into a string.
113
            If the source is provided only the specified output key is sent to the handler.
114
            If the source is omitted the whole tool result is sent to the handler.
115
            Example: {
116
                "source": "docs", "handler": format_documents
117
            }
118
        :param inputs_from_state:
119
            Optional dictionary mapping state keys to tool parameter names.
120
            Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter.
121
        :param outputs_to_state:
122
            Optional dictionary defining how tool outputs map to keys within state as well as optional handlers.
123
            If the source is provided only the specified output key is sent to the handler.
124
            Example: {
125
                "documents": {"source": "docs", "handler": custom_handler}
126
            }
127
            If the source is omitted the whole tool result is sent to the handler.
128
            Example: {
129
                "documents": {"handler": custom_handler}
130
            }
131
        :raises ValueError: If the component is invalid or schema generation fails.
132
        """
133
        if not isinstance(component, Component):
1✔
134
            message = (
1✔
135
                f"Object {component!r} is not a Haystack component. "
136
                "Use ComponentTool only with Haystack component instances."
137
            )
138
            raise ValueError(message)
1✔
139

140
        if getattr(component, "__haystack_added_to_pipeline__", None):
1✔
141
            msg = (
1✔
142
                "Component has been added to a pipeline and can't be used to create a ComponentTool. "
143
                "Create ComponentTool from a non-pipeline component instead."
144
            )
145
            raise ValueError(msg)
1✔
146

147
        self._unresolved_parameters = parameters
1✔
148
        # Create the tools schema from the component run method parameters
149
        tool_schema = parameters or self._create_tool_parameters_schema(component, inputs_from_state or {})
1✔
150

151
        def component_invoker(**kwargs):
1✔
152
            """
153
            Invokes the component using keyword arguments provided by the LLM function calling/tool-generated response.
154

155
            :param kwargs: The keyword arguments to invoke the component with.
156
            :returns: The result of the component invocation.
157
            """
158
            converted_kwargs = {}
1✔
159
            input_sockets = component.__haystack_input__._sockets_dict
1✔
160
            for param_name, param_value in kwargs.items():
1✔
161
                param_type = input_sockets[param_name].type
1✔
162

163
                # Check if the type (or list element type) has from_dict
164
                target_type = get_args(param_type)[0] if get_origin(param_type) is list else param_type
1✔
165
                if hasattr(target_type, "from_dict"):
1✔
166
                    if isinstance(param_value, list):
1✔
167
                        param_value = [target_type.from_dict(item) for item in param_value if isinstance(item, dict)]
1✔
168
                    elif isinstance(param_value, dict):
×
169
                        param_value = target_type.from_dict(param_value)
×
170
                else:
171
                    # Let TypeAdapter handle both single values and lists
172
                    type_adapter = TypeAdapter(param_type)
1✔
173
                    param_value = type_adapter.validate_python(param_value)
1✔
174

175
                converted_kwargs[param_name] = param_value
1✔
176
            logger.debug(f"Invoking component {type(component)} with kwargs: {converted_kwargs}")
1✔
177
            return component.run(**converted_kwargs)
1✔
178

179
        # Generate a name for the tool if not provided
180
        if not name:
1✔
181
            class_name = component.__class__.__name__
1✔
182
            # Convert camelCase/PascalCase to snake_case
183
            name = "".join(
1✔
184
                [
185
                    "_" + c.lower() if c.isupper() and i > 0 and not class_name[i - 1].isupper() else c.lower()
186
                    for i, c in enumerate(class_name)
187
                ]
188
            ).lstrip("_")
189

190
        description = description or component.__doc__ or name
1✔
191

192
        # Create the Tool instance with the component invoker as the function to be called and the schema
193
        super().__init__(
1✔
194
            name=name,
195
            description=description,
196
            parameters=tool_schema,
197
            function=component_invoker,
198
            inputs_from_state=inputs_from_state,
199
            outputs_to_state=outputs_to_state,
200
            outputs_to_string=outputs_to_string,
201
        )
202
        self._component = component
1✔
203

204
    def to_dict(self) -> Dict[str, Any]:
1✔
205
        """
206
        Serializes the ComponentTool to a dictionary.
207
        """
208
        serialized_component = component_to_dict(obj=self._component, name=self.name)
1✔
209

210
        serialized = {
1✔
211
            "component": serialized_component,
212
            "name": self.name,
213
            "description": self.description,
214
            "parameters": self._unresolved_parameters,
215
            "outputs_to_string": self.outputs_to_string,
216
            "inputs_from_state": self.inputs_from_state,
217
            "outputs_to_state": self.outputs_to_state,
218
        }
219

220
        if self.outputs_to_state is not None:
1✔
221
            serialized_outputs = {}
1✔
222
            for key, config in self.outputs_to_state.items():
1✔
223
                serialized_config = config.copy()
1✔
224
                if "handler" in config:
1✔
225
                    serialized_config["handler"] = serialize_callable(config["handler"])
1✔
226
                serialized_outputs[key] = serialized_config
1✔
227
            serialized["outputs_to_state"] = serialized_outputs
1✔
228

229
        if self.outputs_to_string is not None and self.outputs_to_string.get("handler") is not None:
1✔
230
            serialized["outputs_to_string"] = serialize_callable(self.outputs_to_string["handler"])
×
231

232
        return {"type": generate_qualified_class_name(type(self)), "data": serialized}
1✔
233

234
    @classmethod
1✔
235
    def from_dict(cls, data: Dict[str, Any]) -> "Tool":
1✔
236
        """
237
        Deserializes the ComponentTool from a dictionary.
238
        """
239
        inner_data = data["data"]
1✔
240
        component_class = import_class_by_name(inner_data["component"]["type"])
1✔
241
        component = component_from_dict(cls=component_class, data=inner_data["component"], name=inner_data["name"])
1✔
242

243
        if "outputs_to_state" in inner_data and inner_data["outputs_to_state"]:
1✔
244
            deserialized_outputs = {}
1✔
245
            for key, config in inner_data["outputs_to_state"].items():
1✔
246
                deserialized_config = config.copy()
1✔
247
                if "handler" in config:
1✔
248
                    deserialized_config["handler"] = deserialize_callable(config["handler"])
1✔
249
                deserialized_outputs[key] = deserialized_config
1✔
250
            inner_data["outputs_to_state"] = deserialized_outputs
1✔
251

252
        if (
1✔
253
            inner_data.get("outputs_to_string") is not None
254
            and inner_data["outputs_to_string"].get("handler") is not None
255
        ):
256
            inner_data["outputs_to_string"]["handler"] = deserialize_callable(
×
257
                inner_data["outputs_to_string"]["handler"]
258
            )
259

260
        return cls(
1✔
261
            component=component,
262
            name=inner_data["name"],
263
            description=inner_data["description"],
264
            parameters=inner_data.get("parameters", None),
265
            outputs_to_string=inner_data.get("outputs_to_string", None),
266
            inputs_from_state=inner_data.get("inputs_from_state", None),
267
            outputs_to_state=inner_data.get("outputs_to_state", None),
268
        )
269

270
    def _create_tool_parameters_schema(self, component: Component, inputs_from_state: Dict[str, Any]) -> Dict[str, Any]:
1✔
271
        """
272
        Creates an OpenAI tools schema from a component's run method parameters.
273

274
        :param component: The component to create the schema from.
275
        :raises SchemaGenerationError: If schema generation fails
276
        :returns: OpenAI tools schema for the component's run method parameters.
277
        """
278
        properties = {}
1✔
279
        required = []
1✔
280

281
        param_descriptions = self._get_param_descriptions(component.run)
1✔
282

283
        for input_name, socket in component.__haystack_input__._sockets_dict.items():  # type: ignore[attr-defined]
1✔
284
            if inputs_from_state is not None and input_name in inputs_from_state:
1✔
285
                continue
1✔
286
            input_type = socket.type
1✔
287
            description = param_descriptions.get(input_name, f"Input '{input_name}' for the component.")
1✔
288

289
            try:
1✔
290
                property_schema = self._create_property_schema(input_type, description)
1✔
291
            except Exception as e:
×
292
                raise SchemaGenerationError(
×
293
                    f"Error processing input '{input_name}': {e}. "
294
                    f"Schema generation supports basic types (str, int, float, bool, dict), dataclasses, "
295
                    f"and lists of these types as input types for component's run method."
296
                ) from e
297

298
            properties[input_name] = property_schema
1✔
299

300
            # Use socket.is_mandatory to check if the input is required
301
            if socket.is_mandatory:
1✔
302
                required.append(input_name)
1✔
303

304
        parameters_schema = {"type": "object", "properties": properties}
1✔
305

306
        if required:
1✔
307
            parameters_schema["required"] = required
1✔
308

309
        return parameters_schema
1✔
310

311
    @staticmethod
1✔
312
    def _get_param_descriptions(method: Callable) -> Dict[str, str]:
1✔
313
        """
314
        Extracts parameter descriptions from the method's docstring using docstring_parser.
315

316
        :param method: The method to extract parameter descriptions from.
317
        :returns: A dictionary mapping parameter names to their descriptions.
318
        """
319
        docstring = getdoc(method)
1✔
320
        if not docstring:
1✔
321
            return {}
×
322

323
        docstring_parser_import.check()
1✔
324
        parsed_doc = parse(docstring)
1✔
325
        param_descriptions = {}
1✔
326
        for param in parsed_doc.params:
1✔
327
            if not param.description:
1✔
328
                logger.warning(
×
329
                    "Missing description for parameter '%s'. Please add a description in the component's "
330
                    "run() method docstring using the format ':param %%s: <description>'. "
331
                    "This description helps the LLM understand how to use this parameter." % param.arg_name
332
                )
333
            param_descriptions[param.arg_name] = param.description.strip() if param.description else ""
1✔
334
        return param_descriptions
1✔
335

336
    @staticmethod
1✔
337
    def _is_nullable_type(python_type: Any) -> bool:
1✔
338
        """
339
        Checks if the type is a Union with NoneType (i.e., Optional).
340

341
        :param python_type: The Python type to check.
342
        :returns: True if the type is a Union with NoneType, False otherwise.
343
        """
344
        origin = get_origin(python_type)
1✔
345
        if origin is Union:
1✔
346
            return type(None) in get_args(python_type)
1✔
347
        return False
1✔
348

349
    def _create_list_schema(self, item_type: Any, description: str) -> Dict[str, Any]:
1✔
350
        """
351
        Creates a schema for a list type.
352

353
        :param item_type: The type of items in the list.
354
        :param description: The description of the list.
355
        :returns: A dictionary representing the list schema.
356
        """
357
        items_schema = self._create_property_schema(item_type, "")
1✔
358
        items_schema.pop("description", None)
1✔
359
        return {"type": "array", "description": description, "items": items_schema}
1✔
360

361
    def _create_dataclass_schema(self, python_type: Any, description: str) -> Dict[str, Any]:
1✔
362
        """
363
        Creates a schema for a dataclass.
364

365
        :param python_type: The dataclass type.
366
        :param description: The description of the dataclass.
367
        :returns: A dictionary representing the dataclass schema.
368
        """
369
        schema = {"type": "object", "description": description, "properties": {}}
1✔
370
        cls = python_type if isinstance(python_type, type) else python_type.__class__
1✔
371
        for field in fields(cls):
1✔
372
            field_description = f"Field '{field.name}' of '{cls.__name__}'."
1✔
373
            if isinstance(schema["properties"], dict):
1✔
374
                schema["properties"][field.name] = self._create_property_schema(field.type, field_description)
1✔
375
        return schema
1✔
376

377
    @staticmethod
1✔
378
    def _create_basic_type_schema(python_type: Any, description: str) -> Dict[str, Any]:
1✔
379
        """
380
        Creates a schema for a basic Python type.
381

382
        :param python_type: The Python type.
383
        :param description: The description of the type.
384
        :returns: A dictionary representing the basic type schema.
385
        """
386
        type_mapping = {str: "string", int: "integer", float: "number", bool: "boolean", dict: "object"}
1✔
387
        return {"type": type_mapping.get(python_type, "string"), "description": description}
1✔
388

389
    def _create_property_schema(self, python_type: Any, description: str, default: Any = None) -> Dict[str, Any]:
1✔
390
        """
391
        Creates a property schema for a given Python type, recursively if necessary.
392

393
        :param python_type: The Python type to create a property schema for.
394
        :param description: The description of the property.
395
        :param default: The default value of the property.
396
        :returns: A dictionary representing the property schema.
397
        :raises SchemaGenerationError: If schema generation fails, e.g., for unsupported types like Pydantic v2 models
398
        """
399
        nullable = self._is_nullable_type(python_type)
1✔
400
        if nullable:
1✔
401
            non_none_types = [t for t in get_args(python_type) if t is not type(None)]
1✔
402
            python_type = non_none_types[0] if non_none_types else str
1✔
403

404
        origin = get_origin(python_type)
1✔
405
        if origin is list:
1✔
406
            schema = self._create_list_schema(get_args(python_type)[0] if get_args(python_type) else Any, description)
1✔
407
        elif is_dataclass(python_type):
1✔
408
            schema = self._create_dataclass_schema(python_type, description)
1✔
409
        elif hasattr(python_type, "model_validate"):
1✔
410
            raise SchemaGenerationError(
×
411
                f"Pydantic models (e.g. {python_type.__name__}) are not supported as input types for "
412
                f"component's run method."
413
            )
414
        else:
415
            schema = self._create_basic_type_schema(python_type, description)
1✔
416

417
        if default is not None:
1✔
418
            schema["default"] = default
×
419

420
        return schema
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc