• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IBM / unitxt / 13411851455

19 Feb 2025 11:50AM UTC coverage: 80.968% (+0.05%) from 80.917%
13411851455

Pull #1600

github

web-flow
Merge 7196015ea into e24eccb75
Pull Request #1600: small typos in loaders and in profiler

1540 of 1894 branches covered (81.31%)

Branch coverage included in aggregate %.

9674 of 11956 relevant lines covered (80.91%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.63
src/unitxt/utils.py
1
import copy
1✔
2
import importlib.util
1✔
3
import json
1✔
4
import os
1✔
5
import re
1✔
6
import threading
1✔
7
from collections import OrderedDict
1✔
8
from functools import lru_cache
1✔
9
from typing import Any, Dict
1✔
10

11
from .text_utils import is_made_of_sub_strings
1✔
12

13

14
class Singleton(type):
1✔
15
    _instances = {}
1✔
16

17
    def __call__(cls, *args, **kwargs):
1✔
18
        if cls not in cls._instances:
1✔
19
            cls._instances[cls] = super().__call__(*args, **kwargs)
1✔
20
        return cls._instances[cls]
1✔
21

22

23
class LRUCache:
1✔
24
    """An LRU (Least Recently Used) cache that stores a limited number of items.
25

26
    This cache automatically removes the least recently used item when it
27
    exceeds its max size. It behaves similarly to a dictionary, allowing
28
    items to be added and accessed using `[]` syntax.
29

30
    This implementation is thread-safe, using a lock to ensure that only one
31
    thread can modify or access the cache at any time.
32

33
    Args:
34
        max_size (int):
35
            The maximum number of items to store in the cache.
36
            Items exceeding this limit are automatically removed based on least
37
            recent usage.
38
    """
39

40
    def __init__(self, max_size=10):
1✔
41
        self._max_size = max_size
1✔
42
        self._cache = OrderedDict()
1✔
43
        self._lock = threading.Lock()  # Lock to ensure thread safety
1✔
44

45
    @property
1✔
46
    def max_size(self):
1✔
47
        with self._lock:
×
48
            return self._max_size
×
49

50
    @max_size.setter
1✔
51
    def max_size(self, size):
1✔
52
        with self._lock:
1✔
53
            self._max_size = size
1✔
54
            # Adjust the cache if the new size is smaller than the current number of items
55
            while len(self._cache) > self._max_size:
1✔
56
                self._cache.popitem(last=False)
×
57

58
    def __setitem__(self, key, value):
1✔
59
        with self._lock:
1✔
60
            # If the key already exists, remove it first to refresh its order
61
            if key in self._cache:
1✔
62
                self._cache.pop(key)
×
63

64
            # Add the new item to the cache (most recently used)
65
            self._cache[key] = value
1✔
66

67
            # If the cache exceeds the specified size, remove the least recently used item
68
            while len(self._cache) > self._max_size:
1✔
69
                self._cache.popitem(last=False)
1✔
70

71
    def __getitem__(self, key):
1✔
72
        with self._lock:
1✔
73
            if key in self._cache:
1✔
74
                # Move the accessed item to the end (mark as most recently used)
75
                value = self._cache.pop(key)
1✔
76
                self._cache[key] = value
1✔
77
                return value
1✔
78
            raise KeyError(f"{key} not found in cache")
×
79

80
    def set(self, key, value):
1✔
81
        """Sets a key-value pair in the cache."""
82
        with self._lock:
×
83
            if key in self._cache:
×
84
                self._cache.pop(key)
×
85
            self._cache[key] = value
×
86
            while len(self._cache) > self._max_size:
×
87
                self._cache.popitem(last=False)
×
88

89
    def get(self, key, default=None):
1✔
90
        """Gets a value from the cache by key, returning `default` if the key is not found."""
91
        with self._lock:
1✔
92
            if key in self._cache:
1✔
93
                value = self._cache.pop(key)
1✔
94
                self._cache[key] = value  # Move item to end to mark as recently used
1✔
95
                return value
1✔
96
            return default
1✔
97

98
    def __contains__(self, key):
1✔
99
        with self._lock:
1✔
100
            return key in self._cache
1✔
101

102
    def __len__(self):
1✔
103
        with self._lock:
×
104
            return len(self._cache)
×
105

106
    def __repr__(self):
1✔
107
        with self._lock:
×
108
            return f"LRUCache(max_size={self._max_size}, items={list(self._cache.items())})"
×
109

110

111
def flatten_dict(
1✔
112
    d: Dict[str, Any], parent_key: str = "", sep: str = "_"
113
) -> Dict[str, Any]:
114
    items = []
1✔
115
    for k, v in d.items():
1✔
116
        new_key = parent_key + sep + k if parent_key else k
1✔
117
        if isinstance(v, dict):
1✔
118
            items.extend(flatten_dict(v, new_key, sep=sep).items())
1✔
119
        else:
120
            items.append((new_key, v))
1✔
121

122
    return dict(items)
1✔
123

124

125
@lru_cache(maxsize=None)
1✔
126
def artifacts_json_cache(artifact_path):
1✔
127
    return load_json(artifact_path)
1✔
128

129

130
def load_json(path):
1✔
131
    with open(path) as f:
1✔
132
        try:
1✔
133
            return json.load(f)
1✔
134
        except json.decoder.JSONDecodeError as e:
×
135
            with open(path) as f:
×
136
                file_content = "\n".join(f.readlines())
×
137
            raise RuntimeError(
×
138
                f"Failed to decode json file at '{path}' with file content:\n{file_content}"
139
            ) from e
140

141

142
def save_to_file(path, data):
1✔
143
    with open(path, "w") as f:
1✔
144
        f.write(data)
1✔
145
        f.write("\n")
1✔
146

147

148
def json_dump(data):
1✔
149
    return json.dumps(data, indent=4, ensure_ascii=False)
1✔
150

151

152
def is_package_installed(package_name):
1✔
153
    """Check if a package is installed.
154

155
    Parameters:
156
    - package_name (str): The name of the package to check.
157

158
    Returns:
159
    - bool: True if the package is installed, False otherwise.
160
    """
161
    unitxt_pkg = importlib.util.find_spec(package_name)
1✔
162
    return unitxt_pkg is not None
1✔
163

164

165
def is_module_available(module_name):
1✔
166
    """Check if a module is available in the current Python environment.
167

168
    Parameters:
169
    - module_name (str): The name of the module to check.
170

171
    Returns:
172
    - bool: True if the module is available, False otherwise.
173
    """
174
    try:
1✔
175
        __import__(module_name)
1✔
176
        return True
1✔
177
    except ImportError:
1✔
178
        return False
1✔
179

180

181
def remove_numerics_and_quoted_texts(input_str):
1✔
182
    # Remove floats first to avoid leaving stray periods
183
    input_str = re.sub(r"\d+\.\d+", "", input_str)
1✔
184

185
    # Remove integers
186
    input_str = re.sub(r"\d+", "", input_str)
1✔
187

188
    # Remove strings in single quotes
189
    input_str = re.sub(r"'.*?'", "", input_str)
1✔
190

191
    # Remove strings in double quotes
192
    input_str = re.sub(r'".*?"', "", input_str)
1✔
193

194
    # Remove strings in triple quotes
195
    return re.sub(r'""".*?"""', "", input_str, flags=re.DOTALL)
1✔
196

197

198
def safe_eval(expression: str, context: dict, allowed_tokens: list) -> any:
1✔
199
    """Evaluates a given expression in a restricted environment, allowing only specified tokens and context variables.
200

201
    Args:
202
        expression (str): The expression to evaluate.
203
        context (dict): A dictionary mapping variable names to their values, which
204
                        can be used in the expression.
205
        allowed_tokens (list): A list of strings representing allowed tokens (such as
206
                               operators, function names, etc.) that can be used in the expression.
207

208
    Returns:
209
        any: The result of evaluating the expression.
210

211
    Raises:
212
        ValueError: If the expression contains tokens not in the allowed list or context keys.
213

214
    Note:
215
        This function should be used carefully, as it employs `eval`, which can
216
        execute arbitrary code. The function attempts to mitigate security risks
217
        by restricting the available tokens and not exposing built-in functions.
218
    """
219
    allowed_sub_strings = list(context.keys()) + allowed_tokens
1✔
220
    if is_made_of_sub_strings(
1✔
221
        remove_numerics_and_quoted_texts(expression), allowed_sub_strings
222
    ):
223
        return eval(expression, {"__builtins__": {}}, context)
1✔
224
    raise ValueError(
1✔
225
        f"The expression '{expression}' can not be evaluated because it contains tokens outside the allowed list of {allowed_sub_strings}."
226
    )
227

228

229
def import_module_from_file(file_path):
1✔
230
    # Get the module name (file name without extension)
231
    module_name = os.path.splitext(os.path.basename(file_path))[0]
×
232
    # Create a module specification
233
    spec = importlib.util.spec_from_file_location(module_name, file_path)
×
234
    # Create a new module based on the specification
235
    module = importlib.util.module_from_spec(spec)
×
236
    # Load the module
237
    spec.loader.exec_module(module)
×
238
    return module
×
239

240

241
def deep_copy(obj):
1✔
242
    """Creates a deep copy of the given object.
243

244
    Args:
245
        obj: The object to be deep copied.
246

247
    Returns:
248
        A deep copy of the original object.
249
    """
250
    return copy.deepcopy(obj)
1✔
251

252

253
def shallow_copy(obj):
1✔
254
    """Creates a shallow copy of the given object.
255

256
    Args:
257
        obj: The object to be shallow copied.
258

259
    Returns:
260
        A shallow copy of the original object.
261
    """
262
    return copy.copy(obj)
1✔
263

264

265
def recursive_copy(obj, internal_copy=None):
1✔
266
    """Recursively copies an object with a selective copy method.
267

268
    For `list`, `dict`, and `tuple` types, it recursively copies their contents.
269
    For other types, it uses the provided `internal_copy` function if available.
270
    Objects without a `copy` method are returned as is.
271

272
    Args:
273
        obj: The object to be copied.
274
        internal_copy (callable, optional): The copy function to use for non-container objects.
275
            If `None`, objects without a `copy` method are returned as is.
276

277
    Returns:
278
        The recursively copied object.
279
    """
280
    # Handle dictionaries
281
    if isinstance(obj, dict):
1✔
282
        return type(obj)(
1✔
283
            {key: recursive_copy(value, internal_copy) for key, value in obj.items()}
284
        )
285

286
    # Handle named tuples
287
    if isinstance(obj, tuple) and hasattr(obj, "_fields"):
1✔
288
        return type(obj)(*(recursive_copy(item, internal_copy) for item in obj))
1✔
289

290
    # Handle tuples and lists
291
    if isinstance(obj, (tuple, list)):
1✔
292
        return type(obj)(recursive_copy(item, internal_copy) for item in obj)
1✔
293

294
    if internal_copy is None:
1✔
295
        return obj
1✔
296

297
    return internal_copy(obj)
1✔
298

299

300
def recursive_deep_copy(obj):
1✔
301
    """Performs a recursive deep copy of the given object.
302

303
    This function uses `deep_copy` as the internal copy method for non-container objects.
304

305
    Args:
306
        obj: The object to be deep copied.
307

308
    Returns:
309
        A recursively deep-copied version of the original object.
310
    """
311
    return recursive_copy(obj, deep_copy)
1✔
312

313

314
def recursive_shallow_copy(obj):
1✔
315
    """Performs a recursive shallow copy of the given object.
316

317
    This function uses `shallow_copy` as the internal copy method for non-container objects.
318

319
    Args:
320
        obj: The object to be shallow copied.
321

322
    Returns:
323
        A recursively shallow-copied version of the original object.
324
    """
325
    return recursive_copy(obj, shallow_copy)
1✔
326

327

328
class LongString(str):
1✔
329
    def __new__(cls, value, *, repr_str=None):
1✔
330
        obj = super().__new__(cls, value)
×
331
        obj._repr_str = repr_str
×
332
        return obj
×
333

334
    def __repr__(self):
1✔
335
        if self._repr_str is not None:
×
336
            return self._repr_str
×
337
        return super().__repr__()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc