• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IBM / unitxt / 15880205261

25 Jun 2025 03:12PM UTC coverage: 79.77% (+0.06%) from 79.708%
15880205261

push

github

web-flow
Improved error messages (#1838)

* initial

Signed-off-by: elronbandel <elronbandel@gmail.com>

* Improve error messages

Signed-off-by: elronbandel <elronbandel@gmail.com>

* Fif error

Signed-off-by: elronbandel <elronbandel@gmail.com>

* Fix ruff

Signed-off-by: elronbandel <elronbandel@gmail.com>

* Add more contextual error information

Signed-off-by: elronbandel <elronbandel@gmail.com>

* Fix all tests to pass

Signed-off-by: elronbandel <elronbandel@gmail.com>

* Fix some more tests

Signed-off-by: elronbandel <elronbandel@gmail.com>

* Fix another test

Signed-off-by: elronbandel <elronbandel@gmail.com>

* Fix some error and add contexts

Signed-off-by: elronbandel <elronbandel@gmail.com>

* FIx some tests

Signed-off-by: elronbandel <elronbandel@gmail.com>

* Update inference tests

Signed-off-by: elronbandel <elronbandel@gmail.com>

---------

Signed-off-by: elronbandel <elronbandel@gmail.com>
Co-authored-by: Yoav Katz <68273864+yoavkatz@users.noreply.github.com>

1722 of 2141 branches covered (80.43%)

Branch coverage included in aggregate %.

10699 of 13430 relevant lines covered (79.66%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.77
src/unitxt/error_utils.py
1
import re
1✔
2
from contextlib import contextmanager
1✔
3
from typing import Any, Optional
1✔
4

5
from .logging_utils import get_logger
1✔
6
from .settings_utils import get_constants
1✔
7

8
constants = get_constants()
1✔
9
logger = get_logger()
1✔
10

11

12
class Documentation:
1✔
13
    URL = "https://www.unitxt.ai/en/latest/"
1✔
14
    HUGGINGFACE_METRICS = "docs/adding_metric.html#adding-a-hugginface-metric"
1✔
15
    ADDING_TASK = "docs/adding_task.html"
1✔
16
    ADDING_TEMPLATE = "docs/adding_template.html"
1✔
17
    POST_PROCESSORS = "docs/adding_template.html#post-processors"
1✔
18
    MULTIPLE_METRICS_OUTPUTS = (
1✔
19
        "docs/adding_metric.html#metric-outputs-with-multiple-metrics"
20
    )
21
    EVALUATION = "docs/evaluating_datasets.html"
1✔
22
    BENCHMARKS = "docs/benchmark.html"
1✔
23
    DATA_CLASSIFICATION_POLICY = "docs/data_classification_policy.html"
1✔
24
    CATALOG = "docs/saving_and_loading_from_catalog.html"
1✔
25
    SETTINGS = "docs/settings.html"
1✔
26

27

28
def additional_info(path: str) -> str:
1✔
29
    return f"\nFor more information: see {Documentation.URL}/{path} \n"
1✔
30

31

32
class UnitxtError(Exception):
1✔
33
    """Exception raised for Unitxt errors.
34

35
    Args:
36
        message (str): explanation of the error
37
        additional_info_id (Optional[str]): relative path to additional documentation on web
38
            If set, should be one of the DOCUMENTATION_* constants in the error_utils.py file.
39
    """
40

41
    def __init__(self, message: str, additional_info_id: Optional[str] = None):
1✔
42
        if additional_info_id is not None:
1✔
43
            message += additional_info(additional_info_id)
1✔
44
        super().__init__(message)
1✔
45

46

47
class UnitxtWarning:
1✔
48
    """Object to format warning message to log.
49

50
    Args:
51
        message (str): explanation of the warning
52
        additional_info_id (Optional[str]): relative path to additional documentation on web
53
            If set, should be one of the DOCUMENTATION_* constants in the error_utils.py file.
54
    """
55

56
    def __init__(self, message: str, additional_info_id: Optional[str] = None):
1✔
57
        if additional_info_id is not None:
1✔
58
            message += additional_info(additional_info_id)
1✔
59
        logger.warning(message)
1✔
60

61

62
context_block_title = "🦄 Unitxt Error Context"
1✔
63

64

65
def _visible_length(text: str) -> int:
1✔
66
    import unicodedata
1✔
67

68
    ansi_escape = re.compile(r"\x1b\[[0-9;]*[a-zA-Z]|\x1b\]8;;[^\x1b]*\x1b\\")
1✔
69
    clean_text = ansi_escape.sub("", text)
1✔
70
    width = 0
1✔
71
    for char in clean_text:
1✔
72
        if (
1✔
73
            unicodedata.east_asian_width(char) in ("F", "W")
74
            or 0x1F300 <= ord(char) <= 0x1F9FF
75
        ):
76
            width += 2
1✔
77
        else:
78
            width += 1
1✔
79
    return width
1✔
80

81

82
def _make_object_clickable(
1✔
83
    full_obj_name: str, display_name: Optional[str] = None
84
) -> str:
85
    import os
1✔
86

87
    if display_name is None:
1✔
88
        display_name = full_obj_name.split(".")[-1]
×
89
    if full_obj_name.startswith("unitxt."):
1✔
90
        parts = full_obj_name.split(".")
1✔
91
        if len(parts) >= 2:
1✔
92
            module_path = ".".join(parts[:2])
1✔
93
            doc_url = f"{Documentation.URL}{module_path}.html#{full_obj_name}"
1✔
94
            if (
1✔
95
                os.environ.get("TERM_PROGRAM") in ["iTerm.app", "vscode"]
96
                or os.environ.get("TERMINAL_EMULATOR") == "JetBrains-JediTerm"
97
            ):
98
                return f"\033]8;;{doc_url}\033\\{display_name}\033]8;;\033\\"
×
99
            return f"{display_name} ({doc_url})"
1✔
100
    return display_name
1✔
101

102

103
def _get_existing_context(error: Exception):
1✔
104
    """Extract existing context from an error if it exists."""
105
    if hasattr(error, "__error_context__"):
1✔
106
        existing = error.__error_context__
1✔
107
        return (
1✔
108
            existing["original_message"],
109
            existing["context_object"],
110
            existing["context"],
111
        )
112
    return str(error), None, {}
1✔
113

114

115
def _format_object_context(obj: Any) -> Optional[str]:
1✔
116
    """Format an object for display in error context."""
117
    if obj is None:
1✔
118
        return None
1✔
119
    if hasattr(obj, "__class__"):
1✔
120
        class_name = obj.__class__.__name__
1✔
121
        module_name = getattr(obj.__class__, "__module__", "")
1✔
122
    else:
123
        obj_type = type(obj)
×
124
        class_name = obj_type.__name__
×
125
        module_name = getattr(obj_type, "__module__", "")
×
126
    if module_name:
1✔
127
        full_name = f"{module_name}.{class_name}"
1✔
128
        clickable_object = _make_object_clickable(full_name, class_name)
1✔
129
        return f"Object: {clickable_object}"
1✔
130
    return f"Object: {class_name}"
×
131

132

133
def _make_clickable_link(url: str) -> str:
1✔
134
    """Create a clickable terminal link."""
135
    import os
1✔
136

137
    if (
1✔
138
        os.environ.get("TERM_PROGRAM") in ["iTerm.app", "vscode"]
139
        or os.environ.get("TERMINAL_EMULATOR") == "JetBrains-JediTerm"
140
    ):
141
        return f"\033]8;;{url}\033\\link\033]8;;\033\\"
×
142
    return url
1✔
143

144

145
def _format_help_context(help_docs) -> list:
1✔
146
    """Format help documentation into context parts."""
147
    parts = []
1✔
148
    if isinstance(help_docs, str):
1✔
149
        parts.append(f"Help: {_make_clickable_link(help_docs)}")
1✔
150
    elif isinstance(help_docs, dict):
×
151
        for label, url in help_docs.items():
×
152
            parts.append(f"Help ({label}): {_make_clickable_link(url)}")
×
153
    elif isinstance(help_docs, list):
×
154
        for item in help_docs:
×
155
            if isinstance(item, dict) and len(item) == 1:
×
156
                label, url = next(iter(item.items()))
×
157
                parts.append(f"Help ({label}): {_make_clickable_link(url)}")
×
158
            elif isinstance(item, str):
×
159
                parts.append(f"Help: {_make_clickable_link(item)}")
×
160
    return parts
1✔
161

162

163
def _build_context_parts(context_object: Any, context: dict) -> list:
1✔
164
    """Build the list of context information parts."""
165
    parts = []
1✔
166
    ordered_keys = [
1✔
167
        "Python",
168
        "Unitxt",
169
        "Stage",
170
        "Stream",
171
        "Index",
172
        "Instance",
173
        "Object",
174
        "Action",
175
    ]
176
    processed_keys = set()
1✔
177

178
    for desired_key in ordered_keys:
1✔
179
        for actual_key in context.keys():
1✔
180
            if actual_key.lower() == desired_key.lower():
1✔
181
                value = (
1✔
182
                    "unknown" if context[actual_key] is None else context[actual_key]
183
                )
184
                parts.append(f"{actual_key.replace('_', ' ').title()}: {value}")
1✔
185
                processed_keys.add(actual_key)
1✔
186
                break
1✔
187

188
    if not any(key.lower() == "object" for key in processed_keys):
1✔
189
        obj_context = _format_object_context(context_object)
1✔
190
        if obj_context:
1✔
191
            parts.append(obj_context)
1✔
192

193
    processed_keys.add("help")
1✔
194
    for key, value in context.items():
1✔
195
        if key not in processed_keys:
1✔
196
            value = "unknown" if value is None else value
1✔
197
            parts.append(f"{key.replace('_', ' ').title()}: {value}")
1✔
198

199
    if "help" in context:
1✔
200
        parts.extend(_format_help_context(context["help"]))
1✔
201
    else:
202
        parts.append(f"Help: {_make_clickable_link(Documentation.URL)}")
1✔
203

204
    return parts
1✔
205

206

207
def _create_context_box(parts: list) -> str:
1✔
208
    """Create a formatted box containing context information."""
209
    if not parts:
1✔
210
        return ""
×
211
    max_width = (
1✔
212
        max(
213
            _visible_length(context_block_title),
214
            max(_visible_length(part) for part in parts),
215
        )
216
        + 4
217
    )
218
    top_line = "┌" + "─" * max_width + "┐"
1✔
219
    bottom_line = "└" + "─" * max_width + "┘"
1✔
220
    lines = [top_line]
1✔
221
    lines.append(
1✔
222
        f"│ {context_block_title}{' ' * (max_width - _visible_length(context_block_title) - 1)}│"
223
    )
224
    lines.append(f"│ {'-' * (max_width - 2)} │")
1✔
225
    for part in parts:
1✔
226
        padding = " " * (max_width - _visible_length(part) - 4)
1✔
227
        lines.append(f"│  - {part}{padding}│")
1✔
228
    lines.append(bottom_line)
1✔
229
    return "\n".join(lines)
1✔
230

231

232
def _store_context_attributes(
1✔
233
    error: Exception, context_object: Any, context: dict, original_message: str
234
):
235
    """Store context information in error attributes."""
236
    error.__error_context__ = {
1✔
237
        "context_object": context_object,
238
        "context": context,
239
        "original_message": original_message,
240
    }
241
    try:
1✔
242
        error.original_error = type(error)(original_message)
1✔
243
    except (TypeError, ValueError):
1✔
244
        error.original_error = Exception(original_message)
1✔
245
    error.context_object = context_object
1✔
246
    error.context = context
1✔
247

248

249
def _add_context_to_exception(
1✔
250
    original_error: Exception, context_object: Any = None, **context
251
):
252
    """Add context information to an exception by modifying its message."""
253
    original_message, existing_object, existing_context = _get_existing_context(
1✔
254
        original_error
255
    )
256
    final_context_object = existing_object or context_object
1✔
257
    final_context = {
1✔
258
        "Unitxt": constants.version,
259
        "Python": constants.python,
260
        **existing_context,
261
        **context,
262
    }
263
    context_parts = _build_context_parts(final_context_object, final_context)
1✔
264
    context_message = _create_context_box(context_parts)
1✔
265
    _store_context_attributes(
1✔
266
        original_error, final_context_object, final_context, original_message
267
    )
268
    if context_parts:
1✔
269
        formatted_message = f"\n{context_message}\n\n{original_message}"
1✔
270
        original_error.args = (formatted_message,)
1✔
271
    else:
272
        original_error.args = (original_message,)
×
273

274

275
@contextmanager
1✔
276
def error_context(context_object: Any = None, **context):
1✔
277
    """Context manager that catches exceptions and re-raises them with additional context.
278

279
    Args:
280
        context_object: The object being processed (optional)
281
        **context: Any additional context to include in the error message.
282
                  You can provide any key-value pairs that help identify where the error occurred.
283

284
                  Special context keys:
285
                  - help: Documentation links to help with the error.
286
                    Can be a string (single URL), dict (label: URL), or list of URLs/dicts.
287

288
    Examples:
289
        with error_context(self, operation="validation", item_id=42):
290
            result = process_item(item)
291

292
        with error_context(operation="schema_validation", help="https://docs.example.com/schema"):
293
            validate_schema(data)
294

295
        with error_context(processor, step="preprocessing", batch_size=32):
296
            results = process_batch(batch)
297
    """
298
    try:
1✔
299
        yield
1✔
300
    except Exception as e:
1✔
301
        _add_context_to_exception(e, context_object, **context)
1✔
302
        raise
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc