• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IBM / unitxt / 14293170121

06 Apr 2025 01:46PM UTC coverage: 80.205% (-0.01%) from 80.217%
14293170121

push

github

web-flow
Update version to 1.22.0 (#1717)

Signed-off-by: elronbandel <elronbandel@gmail.com>

1582 of 1966 branches covered (80.47%)

Branch coverage included in aggregate %.

9905 of 12356 relevant lines covered (80.16%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.33
src/unitxt/utils.py
1
import copy
1✔
2
import functools
1✔
3
import importlib.util
1✔
4
import json
1✔
5
import logging
1✔
6
import os
1✔
7
import random
1✔
8
import re
1✔
9
import threading
1✔
10
import time
1✔
11
from collections import OrderedDict
1✔
12
from functools import lru_cache
1✔
13
from typing import Any, Dict
1✔
14
from urllib.error import HTTPError as UrllibHTTPError
1✔
15

16
from requests.exceptions import ConnectionError, HTTPError
1✔
17
from requests.exceptions import Timeout as TimeoutError
1✔
18

19
from .settings_utils import get_settings
1✔
20
from .text_utils import is_made_of_sub_strings
1✔
21

22
settings = get_settings()
1✔
23

24
def retry_connection_with_exponential_backoff(max_retries=None,
1✔
25
                                  retry_exceptions=(ConnectionError, TimeoutError, HTTPError, FileNotFoundError, UrllibHTTPError),
26
                                  backoff_factor=1):
27
    """Decorator that implements retry with exponential backoff for network operations.
28

29
    Also handles errors that were triggered by the specified retry exceptions,
30
    whether they're direct causes or part of the exception context.
31

32
    Args:
33
        max_retries: Maximum number of retry attempts (falls back to settings if None)
34
        retry_exceptions: Tuple of exceptions that should trigger a retry
35
        backoff_factor: Base delay factor in seconds for backoff calculation
36

37
    Returns:
38
        The decorated function with retry logic
39
    """
40
    def decorator(func):
1✔
41
        @functools.wraps(func)
1✔
42
        def wrapper(*args, **kwargs):
1✔
43
            # Get max_retries from settings if not provided
44
            retries = max_retries if max_retries is not None else settings.max_connection_retries
1✔
45

46
            for attempt in range(retries):
1✔
47
                try:
1✔
48
                    return func(*args, **kwargs)
1✔
49
                except Exception as e:
1✔
50
                    # Check if this exception or any of its causes match the retry exceptions
51
                    should_retry = False
1✔
52
                    current_exc = e
1✔
53

54
                    # Check the exception chain for both __cause__ (explicit) and __context__ (implicit)
55
                    visited_exceptions = set()  # To prevent infinite loops in rare cyclic exception references
1✔
56

57
                    while current_exc is not None and id(current_exc) not in visited_exceptions:
1✔
58
                        visited_exceptions.add(id(current_exc))
1✔
59

60
                        if isinstance(current_exc, retry_exceptions):
1✔
61
                            should_retry = True
1✔
62
                            break
1✔
63

64
                        # First check __cause__ (from "raise X from Y")
65
                        if current_exc.__cause__ is not None:
1✔
66
                            current_exc = current_exc.__cause__
1✔
67
                        # Then check __context__ (from "try: ... except: raise X")
68
                        elif current_exc.__context__ is not None:
1✔
69
                            current_exc = current_exc.__context__
1✔
70
                        else:
71
                            # No more causes in the chain
72
                            break
×
73

74
                    if not should_retry:
1✔
75
                        # Not a retry exception or caused by a retry exception, so re-raise
76
                        raise
1✔
77

78
                    if attempt >= retries - 1:  # Last attempt
1✔
79
                        raise  # Re-raise the last exception
1✔
80

81
                    # Calculate exponential backoff with jitter
82
                    wait_time = backoff_factor * (2 ** attempt) + random.uniform(0, 1)
1✔
83
                    logging.warning(f"{func.__name__} failed (attempt {attempt+1}/{retries}). "
1✔
84
                                  f"Retrying in {wait_time:.2f}s. Error: {e!s}")
85
                    time.sleep(wait_time)
1✔
86

87
            raise ValueError("there was a problem") from None
×
88
        return wrapper
1✔
89
    return decorator
1✔
90

91
class Singleton(type):
1✔
92
    _instances = {}
1✔
93

94
    def __call__(cls, *args, **kwargs):
1✔
95
        if cls not in cls._instances:
1✔
96
            cls._instances[cls] = super().__call__(*args, **kwargs)
1✔
97
        return cls._instances[cls]
1✔
98

99

100
class LRUCache:
1✔
101
    """An LRU (Least Recently Used) cache that stores a limited number of items.
102

103
    This cache automatically removes the least recently used item when it
104
    exceeds its max size. It behaves similarly to a dictionary, allowing
105
    items to be added and accessed using `[]` syntax.
106

107
    This implementation is thread-safe, using a lock to ensure that only one
108
    thread can modify or access the cache at any time.
109

110
    Args:
111
        max_size (int):
112
            The maximum number of items to store in the cache.
113
            Items exceeding this limit are automatically removed based on least
114
            recent usage.
115
    """
116

117
    def __init__(self, max_size=10):
1✔
118
        self._max_size = max_size
1✔
119
        self._cache = OrderedDict()
1✔
120
        self._lock = threading.Lock()  # Lock to ensure thread safety
1✔
121

122
    @property
1✔
123
    def max_size(self):
1✔
124
        with self._lock:
×
125
            return self._max_size
×
126

127
    @max_size.setter
1✔
128
    def max_size(self, size):
1✔
129
        with self._lock:
1✔
130
            self._max_size = size
1✔
131
            # Adjust the cache if the new size is smaller than the current number of items
132
            while len(self._cache) > self._max_size:
1✔
133
                self._cache.popitem(last=False)
×
134

135
    def __setitem__(self, key, value):
1✔
136
        with self._lock:
1✔
137
            # If the key already exists, remove it first to refresh its order
138
            if key in self._cache:
1✔
139
                self._cache.pop(key)
×
140

141
            # Add the new item to the cache (most recently used)
142
            self._cache[key] = value
1✔
143

144
            # If the cache exceeds the specified size, remove the least recently used item
145
            while len(self._cache) > self._max_size:
1✔
146
                self._cache.popitem(last=False)
1✔
147

148
    def __getitem__(self, key):
1✔
149
        with self._lock:
1✔
150
            if key in self._cache:
1✔
151
                # Move the accessed item to the end (mark as most recently used)
152
                value = self._cache.pop(key)
1✔
153
                self._cache[key] = value
1✔
154
                return value
1✔
155
            raise KeyError(f"{key} not found in cache")
×
156

157
    def set(self, key, value):
1✔
158
        """Sets a key-value pair in the cache."""
159
        with self._lock:
×
160
            if key in self._cache:
×
161
                self._cache.pop(key)
×
162
            self._cache[key] = value
×
163
            while len(self._cache) > self._max_size:
×
164
                self._cache.popitem(last=False)
×
165

166
    def get(self, key, default=None):
1✔
167
        """Gets a value from the cache by key, returning `default` if the key is not found."""
168
        with self._lock:
1✔
169
            if key in self._cache:
1✔
170
                value = self._cache.pop(key)
1✔
171
                self._cache[key] = value  # Move item to end to mark as recently used
1✔
172
                return value
1✔
173
            return default
1✔
174

175
    def __contains__(self, key):
1✔
176
        with self._lock:
1✔
177
            return key in self._cache
1✔
178

179
    def __len__(self):
1✔
180
        with self._lock:
×
181
            return len(self._cache)
×
182

183
    def __repr__(self):
1✔
184
        with self._lock:
×
185
            return f"LRUCache(max_size={self._max_size}, items={list(self._cache.items())})"
×
186

187

188
def flatten_dict(
1✔
189
    d: Dict[str, Any], parent_key: str = "", sep: str = "_"
190
) -> Dict[str, Any]:
191
    items = []
1✔
192
    for k, v in d.items():
1✔
193
        new_key = parent_key + sep + k if parent_key else k
1✔
194
        if isinstance(v, dict):
1✔
195
            items.extend(flatten_dict(v, new_key, sep=sep).items())
1✔
196
        else:
197
            items.append((new_key, v))
1✔
198

199
    return dict(items)
1✔
200

201

202
@lru_cache(maxsize=None)
1✔
203
def artifacts_json_cache(artifact_path):
1✔
204
    return load_json(artifact_path)
1✔
205

206

207
def load_json(path):
1✔
208
    with open(path) as f:
1✔
209
        try:
1✔
210
            return json.load(f)
1✔
211
        except json.decoder.JSONDecodeError as e:
×
212
            with open(path) as f:
×
213
                file_content = "\n".join(f.readlines())
×
214
            raise RuntimeError(
×
215
                f"Failed to decode json file at '{path}' with file content:\n{file_content}"
216
            ) from e
217

218

219
def save_to_file(path, data):
1✔
220
    with open(path, "w") as f:
1✔
221
        f.write(data)
1✔
222
        f.write("\n")
1✔
223

224

225
def json_dump(data):
1✔
226
    return json.dumps(data, indent=4, ensure_ascii=False)
1✔
227

228

229
def is_package_installed(package_name):
1✔
230
    """Check if a package is installed.
231

232
    Parameters:
233
    - package_name (str): The name of the package to check.
234

235
    Returns:
236
    - bool: True if the package is installed, False otherwise.
237
    """
238
    unitxt_pkg = importlib.util.find_spec(package_name)
1✔
239
    return unitxt_pkg is not None
1✔
240

241

242
def is_module_available(module_name):
1✔
243
    """Check if a module is available in the current Python environment.
244

245
    Parameters:
246
    - module_name (str): The name of the module to check.
247

248
    Returns:
249
    - bool: True if the module is available, False otherwise.
250
    """
251
    try:
1✔
252
        __import__(module_name)
1✔
253
        return True
1✔
254
    except ImportError:
1✔
255
        return False
1✔
256

257

258
def remove_numerics_and_quoted_texts(input_str):
1✔
259
    # Remove floats first to avoid leaving stray periods
260
    input_str = re.sub(r"\d+\.\d+", "", input_str)
1✔
261

262
    # Remove integers
263
    input_str = re.sub(r"\d+", "", input_str)
1✔
264

265
    # Remove strings in single quotes
266
    input_str = re.sub(r"'.*?'", "", input_str)
1✔
267

268
    # Remove strings in double quotes
269
    input_str = re.sub(r'".*?"', "", input_str)
1✔
270

271
    # Remove strings in triple quotes
272
    return re.sub(r'""".*?"""', "", input_str, flags=re.DOTALL)
1✔
273

274

275
def safe_eval(expression: str, context: dict, allowed_tokens: list) -> any:
1✔
276
    """Evaluates a given expression in a restricted environment, allowing only specified tokens and context variables.
277

278
    Args:
279
        expression (str): The expression to evaluate.
280
        context (dict): A dictionary mapping variable names to their values, which
281
                        can be used in the expression.
282
        allowed_tokens (list): A list of strings representing allowed tokens (such as
283
                               operators, function names, etc.) that can be used in the expression.
284

285
    Returns:
286
        any: The result of evaluating the expression.
287

288
    Raises:
289
        ValueError: If the expression contains tokens not in the allowed list or context keys.
290

291
    Note:
292
        This function should be used carefully, as it employs `eval`, which can
293
        execute arbitrary code. The function attempts to mitigate security risks
294
        by restricting the available tokens and not exposing built-in functions.
295
    """
296
    allowed_sub_strings = list(context.keys()) + allowed_tokens
1✔
297
    if is_made_of_sub_strings(
1✔
298
        remove_numerics_and_quoted_texts(expression), allowed_sub_strings
299
    ):
300
        return eval(expression, {"__builtins__": {}}, context)
1✔
301
    raise ValueError(
1✔
302
        f"The expression '{expression}' can not be evaluated because it contains tokens outside the allowed list of {allowed_sub_strings}."
303
    )
304

305

306
def import_module_from_file(file_path):
1✔
307
    # Get the module name (file name without extension)
308
    module_name = os.path.splitext(os.path.basename(file_path))[0]
×
309
    # Create a module specification
310
    spec = importlib.util.spec_from_file_location(module_name, file_path)
×
311
    # Create a new module based on the specification
312
    module = importlib.util.module_from_spec(spec)
×
313
    # Load the module
314
    spec.loader.exec_module(module)
×
315
    return module
×
316

317

318
def deep_copy(obj):
1✔
319
    """Creates a deep copy of the given object.
320

321
    Args:
322
        obj: The object to be deep copied.
323

324
    Returns:
325
        A deep copy of the original object.
326
    """
327
    return copy.deepcopy(obj)
1✔
328

329

330
def shallow_copy(obj):
1✔
331
    """Creates a shallow copy of the given object.
332

333
    Args:
334
        obj: The object to be shallow copied.
335

336
    Returns:
337
        A shallow copy of the original object.
338
    """
339
    return copy.copy(obj)
1✔
340

341

342
def recursive_copy(obj, internal_copy=None):
1✔
343
    """Recursively copies an object with a selective copy method.
344

345
    For `list`, `dict`, and `tuple` types, it recursively copies their contents.
346
    For other types, it uses the provided `internal_copy` function if available.
347
    Objects without a `copy` method are returned as is.
348

349
    Args:
350
        obj: The object to be copied.
351
        internal_copy (callable, optional): The copy function to use for non-container objects.
352
            If `None`, objects without a `copy` method are returned as is.
353

354
    Returns:
355
        The recursively copied object.
356
    """
357
    # Handle dictionaries
358
    if isinstance(obj, dict):
1✔
359
        return type(obj)(
1✔
360
            {key: recursive_copy(value, internal_copy) for key, value in obj.items()}
361
        )
362

363
    # Handle named tuples
364
    if isinstance(obj, tuple) and hasattr(obj, "_fields"):
1✔
365
        return type(obj)(*(recursive_copy(item, internal_copy) for item in obj))
1✔
366

367
    # Handle tuples and lists
368
    if isinstance(obj, (tuple, list)):
1✔
369
        return type(obj)(recursive_copy(item, internal_copy) for item in obj)
1✔
370

371
    if internal_copy is None:
1✔
372
        return obj
1✔
373

374
    return internal_copy(obj)
1✔
375

376

377
def recursive_deep_copy(obj):
1✔
378
    """Performs a recursive deep copy of the given object.
379

380
    This function uses `deep_copy` as the internal copy method for non-container objects.
381

382
    Args:
383
        obj: The object to be deep copied.
384

385
    Returns:
386
        A recursively deep-copied version of the original object.
387
    """
388
    return recursive_copy(obj, deep_copy)
1✔
389

390

391
def recursive_shallow_copy(obj):
1✔
392
    """Performs a recursive shallow copy of the given object.
393

394
    This function uses `shallow_copy` as the internal copy method for non-container objects.
395

396
    Args:
397
        obj: The object to be shallow copied.
398

399
    Returns:
400
        A recursively shallow-copied version of the original object.
401
    """
402
    return recursive_copy(obj, shallow_copy)
1✔
403

404

405
class LongString(str):
1✔
406
    def __new__(cls, value, *, repr_str=None):
1✔
407
        obj = super().__new__(cls, value)
×
408
        obj._repr_str = repr_str
×
409
        return obj
×
410

411
    def __repr__(self):
1✔
412
        if self._repr_str is not None:
×
413
            return self._repr_str
×
414
        return super().__repr__()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc