• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 10112264105

26 Jul 2024 01:40PM UTC coverage: 90.045% (-0.001%) from 90.046%
10112264105

Pull #8095

github

web-flow
Merge e16cefc3a into 47f4db869
Pull Request #8095: fix: Fix issue that could lead to RCE if using unsecure Jinja templates

6793 of 7544 relevant lines covered (90.05%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.28
haystack/components/converters/output_adapter.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import ast
1✔
6
import contextlib
1✔
7
from typing import Any, Callable, Dict, Optional, Set
1✔
8

9
import jinja2.runtime
1✔
10
from jinja2 import TemplateSyntaxError, meta
1✔
11
from jinja2.sandbox import SandboxedEnvironment
1✔
12
from typing_extensions import TypeAlias
1✔
13

14
from haystack import component, default_from_dict, default_to_dict
1✔
15
from haystack.utils import deserialize_callable, deserialize_type, serialize_callable, serialize_type
1✔
16

17

18
class OutputAdaptationException(Exception):
1✔
19
    """Exception raised when there is an error during output adaptation."""
20

21

22
@component
1✔
23
class OutputAdapter:
1✔
24
    """
25
    Adapts output of a Component using Jinja templates.
26

27
    Usage example:
28
    ```python
29
    from haystack import Document
30
    from haystack.components.converters import OutputAdapter
31

32
    adapter = OutputAdapter(template="{{ documents[0].content }}", output_type=str)
33
    documents = [Document(content="Test content"]
34
    result = adapter.run(documents=documents)
35

36
    assert result["output"] == "Test content"
37
    ```
38
    """
39

40
    def __init__(self, template: str, output_type: TypeAlias, custom_filters: Optional[Dict[str, Callable]] = None):
1✔
41
        """
42
        Create an OutputAdapter component.
43

44
        :param template:
45
            A Jinja template that defines how to adapt the input data.
46
            The variables in the template define the input of this instance.
47
            e.g.
48
            With this template:
49
            ```
50
            {{ documents[0].content }}
51
            ```
52
            The Component input will be `documents`.
53
        :param output_type:
54
            The type of output this instance will return.
55
        :param custom_filters:
56
            A dictionary of custom Jinja filters used in the template.
57
        """
58
        self.custom_filters = {**(custom_filters or {})}
1✔
59
        input_types: Set[str] = set()
1✔
60

61
        # Create a Jinja native environment, we need it to:
62
        # a) add custom filters to the environment for filter compilation stage
63
        self._env = SandboxedEnvironment(undefined=jinja2.runtime.StrictUndefined)
1✔
64
        try:
1✔
65
            self._env.parse(template)  # Validate template syntax
1✔
66
            self.template = template
1✔
67
        except TemplateSyntaxError as e:
1✔
68
            raise ValueError(f"Invalid Jinja template '{template}': {e}") from e
1✔
69

70
        for name, filter_func in self.custom_filters.items():
1✔
71
            self._env.filters[name] = filter_func
1✔
72

73
        # b) extract variables in the template
74
        route_input_names = self._extract_variables(self._env)
1✔
75
        input_types.update(route_input_names)
1✔
76

77
        # the env is not needed, discarded automatically
78
        component.set_input_types(self, **{var: Any for var in input_types})
1✔
79
        component.set_output_types(self, **{"output": output_type})
1✔
80
        self.output_type = output_type
1✔
81

82
    def run(self, **kwargs):
1✔
83
        """
84
        Renders the Jinja template with the provided inputs.
85

86
        :param kwargs:
87
            Must contain all variables used in the `template` string.
88
        :returns:
89
            A dictionary with the following keys:
90
            - `output`: Rendered Jinja template.
91

92
        :raises OutputAdaptationException: If template rendering fails.
93
        """
94
        # check if kwargs are empty
95
        if not kwargs:
1✔
96
            raise ValueError("No input data provided for output adaptation")
1✔
97
        for name, filter_func in self.custom_filters.items():
1✔
98
            self._env.filters[name] = filter_func
1✔
99
        adapted_outputs = {}
1✔
100
        try:
1✔
101
            adapted_output_template = self._env.from_string(self.template)
1✔
102
            output_result = adapted_output_template.render(**kwargs)
1✔
103
            if isinstance(output_result, jinja2.runtime.Undefined):
1✔
104
                raise OutputAdaptationException(f"Undefined variable in the template {self.template}; kwargs: {kwargs}")
×
105

106
            # We suppress the exception in case the output is already a string, otherwise
107
            # we try to evaluate it and would fail.
108
            # This must be done cause the output could be different literal structures.
109
            # This doesn't support any user types.
110
            with contextlib.suppress(Exception):
1✔
111
                output_result = ast.literal_eval(output_result)
1✔
112

113
            adapted_outputs["output"] = output_result
1✔
114
        except Exception as e:
1✔
115
            raise OutputAdaptationException(f"Error adapting {self.template} with {kwargs}: {e}") from e
1✔
116
        return adapted_outputs
1✔
117

118
    def to_dict(self) -> Dict[str, Any]:
1✔
119
        """
120
        Serializes the component to a dictionary.
121

122
        :returns:
123
            Dictionary with serialized data.
124
        """
125
        se_filters = {name: serialize_callable(filter_func) for name, filter_func in self.custom_filters.items()}
1✔
126
        return default_to_dict(
1✔
127
            self, template=self.template, output_type=serialize_type(self.output_type), custom_filters=se_filters
128
        )
129

130
    @classmethod
1✔
131
    def from_dict(cls, data: Dict[str, Any]) -> "OutputAdapter":
1✔
132
        """
133
        Deserializes the component from a dictionary.
134

135
        :param data:
136
            The dictionary to deserialize from.
137
        :returns:
138
            The deserialized component.
139
        """
140
        init_params = data.get("init_parameters", {})
1✔
141
        init_params["output_type"] = deserialize_type(init_params["output_type"])
1✔
142
        for name, filter_func in init_params.get("custom_filters", {}).items():
1✔
143
            init_params["custom_filters"][name] = deserialize_callable(filter_func) if filter_func else None
1✔
144
        return default_from_dict(cls, data)
1✔
145

146
    def _extract_variables(self, env: SandboxedEnvironment) -> Set[str]:
1✔
147
        """
148
        Extracts all variables from a list of Jinja template strings.
149

150
        :param env: A Jinja native environment.
151
        :return: A set of variable names extracted from the template strings.
152
        """
153
        ast = env.parse(self.template)
1✔
154
        return meta.find_undeclared_variables(ast)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc