• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 20373119300

19 Dec 2025 02:33PM UTC coverage: 92.261% (+0.08%) from 92.183%
20373119300

Pull #10255

github

web-flow
Merge 32bfbda05 into 9082c9715
Pull Request #10255: refactor: support PEP604 typing syntax (`X|Y` and `X|None`)

14186 of 15376 relevant lines covered (92.26%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.09
haystack/utils/type_serialization.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import builtins
1✔
6
import importlib
1✔
7
import inspect
1✔
8
import sys
1✔
9
import typing
1✔
10
from threading import Lock
1✔
11
from types import ModuleType, NoneType, UnionType
1✔
12
from typing import Any, Union, get_args
1✔
13

14
from haystack.core.errors import DeserializationError
1✔
15

16
_import_lock = Lock()
1✔
17

18

19
def _is_union_type(target: Any) -> bool:
1✔
20
    """
21
    Check if target is a Union type.
22

23
    This handles both `typing.Union[X, Y]` and `X | Y` syntax from PEP 604,
24
    including parameterized types like `Optional[str]`.
25
    """
26
    if target is Union or target is UnionType:
1✔
27
        return True
1✔
28
    origin = typing.get_origin(target)
1✔
29
    return origin is Union or origin is UnionType
1✔
30

31

32
def _build_pep604_union_type(types: list) -> Any:
1✔
33
    """Build a union type from a list of types using PEP 604 syntax (X | Y)."""
34
    result = types[0]
1✔
35
    for t in types[1:]:
1✔
36
        result = result | t
1✔
37
    return result
1✔
38

39

40
def serialize_type(target: Any) -> str:
1✔
41
    """
42
    Serializes a type or an instance to its string representation, including the module name.
43

44
    This function handles types, instances of types, and special typing objects.
45
    It assumes that non-typing objects will have a '__name__' attribute.
46

47
    :param target:
48
        The object to serialize, can be an instance or a type.
49
    :return:
50
        The string representation of the type.
51
    """
52
    if target is NoneType:
1✔
53
        return "None"
1✔
54

55
    args = get_args(target)
1✔
56

57
    if isinstance(target, UnionType):
1✔
58
        return " | ".join([serialize_type(a) for a in args])
1✔
59

60
    name = getattr(target, "__name__", str(target))
1✔
61
    if name.startswith("typing."):
1✔
62
        name = name[7:]
×
63
    if "[" in name:
1✔
64
        name = name.split("[")[0]
×
65

66
    # Get module name
67
    module = inspect.getmodule(target)
1✔
68
    module_name = ""
1✔
69
    # We omit the module name for builtins to not clutter the output
70
    if module and hasattr(module, "__name__") and module.__name__ != "builtins":
1✔
71
        module_name = f"{module.__name__}"
1✔
72

73
    if args:
1✔
74
        args_str = ", ".join([serialize_type(a) for a in args if a is not NoneType])
1✔
75
        return f"{module_name}.{name}[{args_str}]" if module_name else f"{name}[{args_str}]"
1✔
76

77
    return f"{module_name}.{name}" if module_name else f"{name}"
1✔
78

79

80
def _parse_generic_args(args_str):
1✔
81
    args = []
1✔
82
    bracket_count = 0
1✔
83
    current_arg = ""
1✔
84

85
    for char in args_str:
1✔
86
        if char == "[":
1✔
87
            bracket_count += 1
1✔
88
        elif char == "]":
1✔
89
            bracket_count -= 1
1✔
90

91
        if char == "," and bracket_count == 0:
1✔
92
            args.append(current_arg.strip())
1✔
93
            current_arg = ""
1✔
94
        else:
95
            current_arg += char
1✔
96

97
    if current_arg:
1✔
98
        args.append(current_arg.strip())
1✔
99

100
    return args
1✔
101

102

103
def _parse_pep604_union_args(union_str: str) -> list[str]:
1✔
104
    """
105
    Parse a PEP 604 union string (e.g., "str | int | None") into individual type strings.
106

107
    Handles nested generics properly, e.g., "list[str] | dict[str, int] | None".
108

109
    :param union_str: The union string to parse
110
    :returns: A list of individual type strings
111
    """
112
    args = []
1✔
113
    bracket_count = 0
1✔
114
    current_arg = ""
1✔
115

116
    for char in union_str:
1✔
117
        if char == "[":
1✔
118
            bracket_count += 1
1✔
119
        elif char == "]":
1✔
120
            bracket_count -= 1
1✔
121

122
        if char == "|" and bracket_count == 0:
1✔
123
            args.append(current_arg.strip())
1✔
124
            current_arg = ""
1✔
125
        else:
126
            current_arg += char
1✔
127

128
    if current_arg.strip():
1✔
129
        args.append(current_arg.strip())
1✔
130

131
    return args
1✔
132

133

134
def deserialize_type(type_str: str) -> Any:  # pylint: disable=too-many-return-statements
1✔
135
    """
136
    Deserializes a type given its full import path as a string, including nested generic types.
137

138
    This function will dynamically import the module if it's not already imported
139
    and then retrieve the type object from it. It also handles nested generic types like
140
    `list[dict[int, str]]`.
141

142
    :param type_str:
143
        The string representation of the type's full import path.
144
    :returns:
145
        The deserialized type object.
146
    :raises DeserializationError:
147
        If the type cannot be deserialized due to missing module or type.
148
    """
149
    # Handle PEP 604 union syntax at the top level (e.g., "str | int", "str | None")
150
    pep604_union_args = _parse_pep604_union_args(type_str)
1✔
151
    if len(pep604_union_args) > 1:
1✔
152
        deserialized_args = [deserialize_type(arg) for arg in pep604_union_args]
1✔
153
        return _build_pep604_union_type(deserialized_args)
1✔
154

155
    # Handle generics (including Union[X, Y])
156
    if "[" in type_str and type_str.endswith("]"):
1✔
157
        main_type_str, generics_str = type_str.split("[", 1)
1✔
158
        generics_str = generics_str[:-1]
1✔
159

160
        main_type = deserialize_type(main_type_str)
1✔
161
        generic_args = [deserialize_type(arg) for arg in _parse_generic_args(generics_str)]
1✔
162

163
        # Reconstruct
164
        try:
1✔
165
            return main_type[tuple(generic_args) if len(generic_args) > 1 else generic_args[0]]
1✔
166
        except (TypeError, AttributeError) as e:
×
167
            raise DeserializationError(f"Could not apply arguments {generic_args} to type {main_type}") from e
×
168

169
    # Handle non-generic types
170
    # First, check if there's a module prefix
171
    if "." in type_str:
1✔
172
        parts = type_str.split(".")
1✔
173
        module_name = ".".join(parts[:-1])
1✔
174
        type_name = parts[-1]
1✔
175

176
        module = sys.modules.get(module_name)
1✔
177
        if module is None:
1✔
178
            try:
×
179
                module = thread_safe_import(module_name)
×
180
            except ImportError as e:
×
181
                raise DeserializationError(f"Could not import the module: {module_name}") from e
×
182

183
        # Get the class from the module
184
        if hasattr(module, type_name):
1✔
185
            return getattr(module, type_name)
1✔
186

187
        raise DeserializationError(f"Could not locate the type: {type_name} in the module: {module_name}")
×
188

189
    # No module prefix, check builtins and typing
190
    # First check builtins
191
    if hasattr(builtins, type_str):
1✔
192
        return getattr(builtins, type_str)
1✔
193

194
    # Then check typing
195
    if hasattr(typing, type_str):
1✔
196
        return getattr(typing, type_str)
1✔
197

198
    # Special case for NoneType
199
    if type_str == "NoneType":
1✔
200
        return NoneType
1✔
201

202
    # Special case for None
203
    if type_str == "None":
×
204
        return None
×
205

206
    raise DeserializationError(f"Could not deserialize type: {type_str}")
×
207

208

209
def thread_safe_import(module_name: str) -> ModuleType:
1✔
210
    """
211
    Import a module in a thread-safe manner.
212

213
    Importing modules in a multi-threaded environment can lead to race conditions.
214
    This function ensures that the module is imported in a thread-safe manner without having impact
215
    on the performance of the import for single-threaded environments.
216

217
    :param module_name: the module to import
218
    """
219
    with _import_lock:
1✔
220
        return importlib.import_module(module_name)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc