• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IBM / unitxt / 16328536129

16 Jul 2025 07:29PM UTC coverage: 81.326% (+0.08%) from 81.249%
16328536129

Pull #1861

github

web-flow
Merge 5fe786c62 into 098653cc0
Pull Request #1861: Fix compatibility with datasets 4.0

1579 of 1949 branches covered (81.02%)

Branch coverage included in aggregate %.

10689 of 13136 relevant lines covered (81.37%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.63
src/unitxt/settings_utils.py
1
import asyncio
1✔
2
import importlib.metadata
1✔
3
import importlib.util
1✔
4
import os
1✔
5
import sys
1✔
6
import threading
1✔
7
from contextlib import contextmanager
1✔
8
from contextvars import ContextVar
1✔
9

10
from .version import version
1✔
11

12

13
def cast_to_type(value, value_type):
1✔
14
    if value_type is bool:
1✔
15
        if value not in ["True", "False", True, False]:
1✔
16
            raise ValueError(
17
                f"Value must be in ['True', 'False', True, False] got {value}"
18
            )
19
        if value == "True":
1✔
20
            return True
1✔
21
        if value == "False":
1✔
22
            return False
1✔
23
        return value
1✔
24
    if value_type is int:
1✔
25
        return int(value)
1✔
26
    if value_type is float:
1✔
27
        return float(value)
1✔
28

29
    raise ValueError("Unsupported type.")
30

31

32
class Settings:
1✔
33
    _instance = None
1✔
34
    _settings = {}
1✔
35
    _types = {}
1✔
36
    _logger = None
37
    _thread_local = threading.local()
1✔
38
    _context_settings = ContextVar("settings", default=None)
1✔
39

40
    @classmethod
1✔
41
    def is_uninitilized(cls):
1✔
42
        return cls._instance is None
1✔
43

44
    def __new__(cls):
1✔
45
        if cls.is_uninitilized():
1✔
46
            cls._instance = super().__new__(cls)
1✔
47
        return cls._instance
1✔
48

49
    def _is_async_context(self):
1✔
50
        """Check if we're in an async context."""
51
        try:
1✔
52
            asyncio.current_task()
1✔
53
            return True
×
54
        except RuntimeError:
1✔
55
            return False
1✔
56

57
    def _get_context_stack(self):
1✔
58
        """Get the current context stack (list of dicts)."""
59
        if self._is_async_context():
1✔
60
            stack = self._context_settings.get()
×
61
            return stack if stack is not None else []
×
62
        if not hasattr(self._thread_local, "stack"):
1✔
63
            self._thread_local.stack = []
1✔
64
        return self._thread_local.stack
1✔
65

66
    def __setattr__(self, key, value):
1✔
67
        if key.endswith("_key") or key in {"_instance", "_settings"}:
1✔
68
            raise AttributeError(f"Modifying '{key}' is not allowed.")
1✔
69

70
        if isinstance(value, tuple) and len(value) == 2:
1✔
71
            value_type, value = value
1✔
72
            if value_type not in [int, float, bool]:
1✔
73
                raise ValueError(
74
                    f"Setting settings with tuple requires the first element to be either [int, float, bool], got {value_type}"
75
                )
76
            self._types[key] = value_type
1✔
77

78
        if key in self._types and value is not None:
1✔
79
            value_type = self._types[key]
1✔
80
            value = cast_to_type(value, value_type)
1✔
81

82
        # Check if we're in a context
83
        stack = self._get_context_stack()
1✔
84
        if stack:
1✔
85
            # Modify the innermost context
86
            stack[-1][key] = value
×
87
            if self._logger is not None:
88
                self._logger.info(
89
                    f"unitxt.settings.{key} (context-local) changed to: {value}"
90
                )
91
        else:
92
            # Modify global settings
93
            if key in self._settings:
1✔
94
                if self._logger is not None:
95
                    self._logger.info(
96
                        f"unitxt.settings.{key} changed: {self._settings[key]} -> {value}"
97
                    )
98
            self._settings[key] = value
1✔
99

100
    def __getattr__(self, key):
1✔
101
        if key.endswith("_key"):
1✔
102
            actual_key = key[:-4]
1✔
103
            return self.environment_variable_key_name(actual_key)
1✔
104

105
        key_name = self.environment_variable_key_name(key)
1✔
106
        env_value = os.getenv(key_name)
1✔
107

108
        if env_value is not None:
1✔
109
            if key in self._types:
1✔
110
                env_value = cast_to_type(env_value, self._types[key])
1✔
111
            return env_value
1✔
112

113
        # Check context stack from innermost to outermost
114
        stack = self._get_context_stack()
1✔
115
        for context in reversed(stack):
1✔
116
            if key in context:
1✔
117
                return context[key]
1✔
118

119
        # Then check global settings
120
        if key in self._settings:
1✔
121
            return self._settings[key]
1✔
122

123
        raise AttributeError(f"'{key}' not found")
1✔
124

125
    def environment_variable_key_name(self, key):
1✔
126
        return "UNITXT_" + key.upper()
1✔
127

128
    def get_all_environment_variables(self):
1✔
129
        return [
×
130
            self.environment_variable_key_name(key) for key in self._settings.keys()
131
        ]
132

133
    @contextmanager
1✔
134
    def context(self, **kwargs):
1✔
135
        """Context manager that uses thread-local or async-local storage with proper nesting."""
136
        # Apply type conversion
137
        for key, value in kwargs.items():
1✔
138
            if key in self._types and value is not None:
1✔
139
                kwargs[key] = cast_to_type(value, self._types[key])
1✔
140

141
        if self._is_async_context():
1✔
142
            # Handle async context
143
            current_stack = self._context_settings.get()
×
144
            if current_stack is None:
×
145
                current_stack = []
×
146

147
            # Create new stack with added context
148
            new_stack = [*current_stack, kwargs.copy()]
×
149
            token = self._context_settings.set(new_stack)
×
150

151
            try:
×
152
                yield
×
153
            finally:
154
                self._context_settings.reset(token)
×
155
        else:
156
            # Handle thread-local context
157
            if not hasattr(self._thread_local, "stack"):
1✔
158
                self._thread_local.stack = []
×
159

160
            self._thread_local.stack.append(kwargs.copy())
1✔
161
            try:
1✔
162
                yield
1✔
163
            finally:
164
                self._thread_local.stack.pop()
1✔
165

166

167
class Constants:
1✔
168
    _instance = None
1✔
169
    _constants = {}
1✔
170

171
    @classmethod
1✔
172
    def is_uninitilized(cls):
1✔
173
        return cls._instance is None
1✔
174

175
    def __new__(cls):
1✔
176
        if cls.is_uninitilized():
1✔
177
            cls._instance = super().__new__(cls)
1✔
178
        return cls._instance
1✔
179

180
    def __setattr__(self, key, value):
1✔
181
        if key.endswith("_key") or key in {"_instance", "_constants"}:
1✔
182
            raise AttributeError(f"Modifying '{key}' is not allowed.")
×
183
        if key in self._constants:
1✔
184
            raise ValueError("Cannot override constants.")
185
        self._constants[key] = value
1✔
186

187
    def __getattr__(self, key):
1✔
188
        if key in self._constants:
1✔
189
            return self._constants[key]
1✔
190

191
        raise AttributeError(f"'{key}' not found")
×
192

193

194
if Settings.is_uninitilized():
1✔
195
    settings = Settings()
1✔
196
    settings.allow_unverified_code = (bool, False)
1✔
197
    settings.use_only_local_catalogs = (bool, False)
1✔
198
    settings.global_loader_limit = (int, None)
1✔
199
    settings.num_resamples_for_instance_metrics = (int, 1000)
1✔
200
    settings.num_resamples_for_global_metrics = (int, 100)
1✔
201
    settings.max_log_message_size = (int, 100000)
1✔
202
    settings.catalogs = None
1✔
203
    settings.artifactories = None
1✔
204
    settings.default_recipe = "dataset_recipe"
1✔
205
    settings.default_verbosity = "info"
1✔
206
    settings.use_eager_execution = False
1✔
207
    settings.remote_metrics = []
1✔
208
    settings.test_card_disable = (bool, False)
1✔
209
    settings.test_metric_disable = (bool, False)
1✔
210
    settings.metrics_master_key_token = None
1✔
211
    settings.seed = (int, 42)
1✔
212
    settings.skip_artifacts_prepare_and_verify = (bool, False)
1✔
213
    settings.data_classification_policy = None
1✔
214
    settings.mock_inference_mode = (bool, False)
1✔
215
    settings.disable_hf_datasets_cache = (bool, False)
1✔
216
    settings.stream_hf_datasets_by_default = (bool, False)
1✔
217
    settings.loader_cache_size = (int, 25)
1✔
218
    settings.loaders_max_retries = (int, 10)
1✔
219
    settings.task_data_as_text = (bool, True)
1✔
220
    settings.default_provider = "watsonx"
1✔
221
    settings.default_format = None
1✔
222
    settings.hf_offline_datasets_path = None
1✔
223
    settings.hf_offline_metrics_path = None
1✔
224
    settings.hf_offline_models_path = None
1✔
225
    settings.inference_engine_cache_path = "./inference_engine_cache/"
1✔
226
    settings.max_connection_retries = 3
1✔
227
    settings.max_templates_tests_for_card_test = 10
1✔
228

229
if Constants.is_uninitilized():
1✔
230
    constants = Constants()
1✔
231
    constants.dataset_file = os.path.join(os.path.dirname(__file__), "dataset.py")
1✔
232
    constants.metric_file = os.path.join(os.path.dirname(__file__), "metric.py")
1✔
233
    constants.local_catalog_path = os.path.join(os.path.dirname(__file__), "catalog")
1✔
234
    unitxt_pkg = importlib.util.find_spec("unitxt")
1✔
235
    if unitxt_pkg and unitxt_pkg.origin:
1✔
236
        constants.package_dir = os.path.dirname(unitxt_pkg.origin)
1✔
237
        constants.default_catalog_path = os.path.join(constants.package_dir, "catalog")
1✔
238
    else:
239
        constants.default_catalog_path = constants.local_catalog_path
×
240
    constants.catalog_dir = constants.local_catalog_path
1✔
241
    constants.dataset_url = "unitxt/data"
1✔
242
    constants.metric_url = "unitxt/metric"
1✔
243
    constants.version = version
1✔
244
    constants.python = (
1✔
245
        f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
246
    )
247
    constants.catalog_hierarchy_sep = "."
1✔
248
    constants.env_local_catalogs_paths_sep = ":"
1✔
249
    constants.non_registered_files = [
1✔
250
        "__init__.py",
251
        "artifact.py",
252
        "utils.py",
253
        "register.py",
254
        "metric.py",
255
        "dataset.py",
256
        "blocks.py",
257
    ]
258
    constants.codebase_url = "https://github.com/IBM/unitxt"
1✔
259
    constants.website_url = "https://www.unitxt.org"
1✔
260
    constants.inference_stream = "__INFERENCE_STREAM__"
1✔
261
    constants.instance_stream = "__INSTANCE_STREAM__"
1✔
262
    constants.image_tag = "unitxt-img"
1✔
263
    constants.demos_pool_field = "_demos_pool_"
1✔
264
    constants.demos_field = "demos"
1✔
265
    constants.instruction_field = "instruction"
1✔
266
    constants.system_prompt_field = "system_prompt"
1✔
267

268

269
def get_settings() -> Settings:
1✔
270
    return Settings()
1✔
271

272

273
def get_constants():
1✔
274
    return Constants()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc