• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

abravalheri / validate-pyproject / 6178271622070272

01 May 2026 06:37PM UTC coverage: 97.628% (-0.07%) from 97.694%
6178271622070272

push

cirrus-ci

web-flow
fix: support relative urls in SchemaStore (#306)

**Root cause**: SchemaStore's `pyproject.json` recently switched from absolute URLs to **relative `$ref` URLs** (e.g. `"$ref": "partial-black.json"`). The code was passing these directly to `load_from_uri`, which tried to open them as local files, causing `FileNotFoundError`.

1. **`src/validate_pyproject/remote.py`** — Resolve relative `$ref` URLs using `urllib.parse.urljoin`:
   - When loading tool schemas from the store, resolve `$ref` against the store URL.
   - When loading nested `$ref`s inside a schema's `properties`, resolve against the schema's `$id`.
   - Also fixed a typo in the protocol check (`("https://", "https://")` → `("http://", "https://")`).

2. **`tools/cache_urls_for_tests.py`** — Updated the cache script to:
   - Resolve relative `$ref`s against `SCHEMA_STORE`.
   - Recursively discover and download nested relative refs (so offline tests work too).

3. **Populated `tests/.cache/`** with all required schemas so the tests run offline.


Assisted-by: OpenCode:Kimi-K2.6

Signed-off-by: Henry Schreiner <henryfs@princeton.edu>

418 of 434 branches covered (96.31%)

Branch coverage included in aggregate %.

5 of 5 new or added lines in 1 file covered. (100.0%)

8 existing lines in 2 files now uncovered.

1064 of 1084 relevant lines covered (98.15%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.65
/src/validate_pyproject/api.py
1
"""
2
Retrieve JSON schemas for validating dicts representing a ``pyproject.toml`` file.
3
"""
4

5
from __future__ import annotations
1✔
6

7
import json
1✔
8
import logging
1✔
9
import typing
1✔
10
from collections.abc import Iterator, Mapping, Sequence
1✔
11
from enum import Enum
1✔
12
from functools import partial, reduce
1✔
13
from types import MappingProxyType, ModuleType
1✔
14
from typing import (
1✔
15
    Callable,
16
    TypeVar,
17
)
18

19
import fastjsonschema as FJS
1✔
20

21
from . import _resources, errors, formats
1✔
22
from .error_reporting import detailed_errors
1✔
23
from .extra_validations import EXTRA_VALIDATIONS
1✔
24
from .types import FormatValidationFn, Schema, ValidationFn
1✔
25

26
_logger = logging.getLogger(__name__)
1✔
27

28
if typing.TYPE_CHECKING:  # pragma: no cover
29
    from .plugins import PluginProtocol
30

31

32
__all__ = ["Validator"]
1✔
33

34

35
T = TypeVar("T", bound=Mapping)
1✔
36
AllPlugins = Enum("AllPlugins", "ALL_PLUGINS")  #: :meta private:
1✔
37
ALL_PLUGINS = AllPlugins.ALL_PLUGINS
1✔
38

39
TOP_LEVEL_SCHEMA = "pyproject_toml"
1✔
40
PROJECT_TABLE_SCHEMA = "project_metadata"
1✔
41

42

43
def _get_public_functions(module: ModuleType) -> Mapping[str, FormatValidationFn]:
1✔
44
    return {
1✔
45
        fn.__name__.replace("_", "-"): fn
46
        for fn in module.__dict__.values()
47
        if callable(fn) and not fn.__name__.startswith("_")
48
    }
49

50

51
FORMAT_FUNCTIONS = MappingProxyType(_get_public_functions(formats))
1✔
52

53

54
def load(name: str, package: str = __package__, ext: str = ".schema.json") -> Schema:
1✔
55
    """Load the schema from a JSON Schema file.
56
    The returned dict-like object is immutable.
57

58
    :meta private: (low level detail)
59
    """
60
    return Schema(json.loads(_resources.read_text(package, f"{name}{ext}")))
1✔
61

62

63
def load_builtin_plugin(name: str) -> Schema:
1✔
64
    """:meta private: (low level detail)"""
65
    return load(name, f"{__package__}.plugins")
1✔
66

67

68
class SchemaRegistry(Mapping[str, Schema]):
1✔
69
    """Repository of parsed JSON Schemas used for validating a ``pyproject.toml``.
70

71
    During instantiation the schemas equivalent to PEP 517, PEP 518 and PEP 621
72
    will be combined with the schemas for the ``tool`` subtables provided by the
73
    plugins.
74

75
    Since this object work as a mapping between each schema ``$id`` and the schema
76
    itself, all schemas provided by plugins **MUST** have a top level ``$id``.
77

78
    :meta private: (low level detail)
79
    """
80

81
    def __init__(self, plugins: Sequence[PluginProtocol] = ()):
1✔
82
        self._schemas: dict[str, tuple[str, str, Schema]] = {}
1✔
83
        # (which part of the TOML, who defines, schema)
84

85
        top_level = typing.cast("dict", load(TOP_LEVEL_SCHEMA))  # Make it mutable
1✔
86
        self._spec_version: str = top_level["$schema"]
1✔
87
        top_properties = top_level["properties"]
1✔
88
        tool_properties = top_properties["tool"].setdefault("properties", {})
1✔
89

90
        # Add PEP 621
91
        project_table_schema = load(PROJECT_TABLE_SCHEMA)
1✔
92
        self._ensure_compatibility(PROJECT_TABLE_SCHEMA, project_table_schema)
1✔
93
        sid = project_table_schema["$id"]
1✔
94
        top_level["project"] = {"$ref": sid}
1✔
95
        origin = f"{__name__} - project metadata"
1✔
96
        self._schemas = {sid: ("project", origin, project_table_schema)}
1✔
97

98
        # Add tools using Plugins
99
        for plugin in plugins:
1✔
100
            if plugin.tool:
1✔
101
                allow_overwrite: str | None = None
1✔
102
                if plugin.tool in tool_properties:
1✔
103
                    _logger.warning(f"{plugin} overwrites `tool.{plugin.tool}` schema")
1✔
104
                    allow_overwrite = plugin.schema.get("$id")
1✔
105
                else:
106
                    _logger.info(f"{plugin} defines `tool.{plugin.tool}` schema")
1✔
107
                compatible = self._ensure_compatibility(
1✔
108
                    plugin.tool, plugin.schema, allow_overwrite
109
                )
110
                sid = compatible["$id"]
1✔
111
                sref = f"{sid}#{plugin.fragment}" if plugin.fragment else sid
1✔
112
                tool_properties[plugin.tool] = {"$ref": sref}
1✔
113
                self._schemas[sid] = (f"tool.{plugin.tool}", plugin.id, plugin.schema)
1✔
114
            else:
115
                _logger.info(f"{plugin} defines extra schema {plugin.id}")
1✔
116
                self._schemas[plugin.id] = (plugin.id, plugin.id, plugin.schema)
1✔
117

118
        self._main_id: str = top_level["$id"]
1✔
119
        main_schema = Schema(top_level)
1✔
120
        origin = f"{__name__} - build metadata"
1✔
121
        self._schemas[self._main_id] = ("<$ROOT>", origin, main_schema)
1✔
122

123
    @property
1✔
124
    def spec_version(self) -> str:
1✔
125
        """Version of the JSON Schema spec in use"""
126
        return self._spec_version
1✔
127

128
    @property
1✔
129
    def main(self) -> str:
1✔
130
        """Top level schema for validating a ``pyproject.toml`` file"""
131
        return self._main_id
1✔
132

133
    def _ensure_compatibility(
1✔
134
        self,
135
        reference: str,
136
        schema: Schema,
137
        allow_overwrite: str | None = None,
138
    ) -> Schema:
139
        if "$id" not in schema or not schema["$id"]:
1✔
140
            raise errors.SchemaMissingId(reference or "<extra>")
1✔
141
        sid = schema["$id"]
1✔
142
        if sid in self._schemas and sid != allow_overwrite:
1✔
143
            existing = self._schemas[sid][-1]
1✔
144
            if dict(existing) != dict(schema):
1✔
145
                raise errors.SchemaWithDuplicatedId(sid)
1✔
146
            _logger.warning(
1✔
147
                f"Duplicate schema {sid!r} for `tool.{reference}` ignored "
148
                "(same schema already registered)"
149
            )
150
            return existing
1✔
151
        version = schema.get("$schema")
1✔
152
        # Support schemas with missing trailing # (incorrect, but required before 0.15)
153
        if version and version.rstrip("#") != self.spec_version.rstrip("#"):
1✔
154
            raise errors.InvalidSchemaVersion(
1✔
155
                reference or sid, version, self.spec_version
156
            )
157
        return schema
1✔
158

159
    def __getitem__(self, key: str) -> Schema:
1✔
160
        return self._schemas[key][-1]
1✔
161

162
    def __iter__(self) -> Iterator[str]:
1✔
UNCOV
163
        return iter(self._schemas)
×
164

165
    def __len__(self) -> int:
1✔
UNCOV
166
        return len(self._schemas)
×
167

168

169
class RefHandler(Mapping[str, Callable[[str], Schema]]):
1✔
170
    """:mod:`fastjsonschema` allows passing a dict-like object to load external schema
171
    ``$ref``s. Such objects map the URI schema (e.g. ``http``, ``https``, ``ftp``)
172
    into a function that receives the schema URI and returns the schema (as parsed JSON)
173
    (otherwise :mod:`urllib` is used and the URI is assumed to be a valid URL).
174
    This class will ensure all the URIs are loaded from the local registry.
175

176
    :meta private: (low level detail)
177
    """
178

179
    def __init__(self, registry: Mapping[str, Schema]):
1✔
180
        self._uri_schemas = ["http", "https"]
1✔
181
        self._registry = registry
1✔
182

183
    def __contains__(self, key: object) -> bool:
1✔
184
        if isinstance(key, str):
1!
185
            if key not in self._uri_schemas:
1!
UNCOV
186
                self._uri_schemas.append(key)
×
187
            return True
1✔
UNCOV
188
        return False
×
189

190
    def __iter__(self) -> Iterator[str]:
1✔
UNCOV
191
        return iter(self._uri_schemas)
×
192

193
    def __len__(self) -> int:
1✔
UNCOV
194
        return len(self._uri_schemas)
×
195

196
    def __getitem__(self, key: str) -> Callable[[str], Schema]:
1✔
197
        """All the references should be retrieved from the registry"""
198
        return self._registry.__getitem__
1✔
199

200

201
class Validator:
1✔
202
    _plugins: Sequence[PluginProtocol]
1✔
203

204
    def __init__(
1✔
205
        self,
206
        plugins: Sequence[PluginProtocol] | AllPlugins = ALL_PLUGINS,
207
        format_validators: Mapping[str, FormatValidationFn] = FORMAT_FUNCTIONS,
208
        extra_validations: Sequence[ValidationFn] = EXTRA_VALIDATIONS,
209
        *,
210
        extra_plugins: Sequence[PluginProtocol] = (),
211
    ):
212
        self._code_cache: str | None = None
1✔
213
        self._cache: ValidationFn | None = None
1✔
214
        self._schema: Schema | None = None
1✔
215

216
        # Let's make the following options readonly
217
        self._format_validators = MappingProxyType(format_validators)
1✔
218
        self._extra_validations = tuple(extra_validations)
1✔
219

220
        if plugins is ALL_PLUGINS:
1✔
221
            from .plugins import list_from_entry_points
1✔
222

223
            plugins = list_from_entry_points()
1✔
224

225
        self._plugins = (*plugins, *extra_plugins)
1✔
226

227
        self._schema_registry = SchemaRegistry(self._plugins)
1✔
228
        self.handlers = RefHandler(self._schema_registry)
1✔
229

230
    @property
1✔
231
    def registry(self) -> SchemaRegistry:
1✔
232
        return self._schema_registry
1✔
233

234
    @property
1✔
235
    def schema(self) -> Schema:
1✔
236
        """Top level ``pyproject.toml`` JSON Schema"""
237
        return Schema({"$ref": self._schema_registry.main})
1✔
238

239
    @property
1✔
240
    def extra_validations(self) -> Sequence[ValidationFn]:
1✔
241
        """List of extra validation functions that run after the JSON Schema check"""
242
        return self._extra_validations
1✔
243

244
    @property
1✔
245
    def formats(self) -> Mapping[str, FormatValidationFn]:
1✔
246
        """Mapping between JSON Schema formats and functions that validates them"""
247
        return self._format_validators
1✔
248

249
    @property
1✔
250
    def generated_code(self) -> str:
1✔
251
        if self._code_cache is None:
1!
252
            fmts = dict(self.formats)
1✔
253
            self._code_cache = FJS.compile_to_code(
1✔
254
                self.schema, self.handlers, fmts, use_default=False
255
            )
256

257
        return self._code_cache
1✔
258

259
    def __getitem__(self, schema_id: str) -> Schema:
1✔
260
        """Retrieve a schema from registry"""
UNCOV
261
        return self._schema_registry[schema_id]
×
262

263
    def __call__(self, pyproject: T) -> T:
1✔
264
        """Checks a parsed ``pyproject.toml`` file (given as :obj:`typing.Mapping`)
265
        and raises an exception when it is not a valid.
266
        """
267
        if self._cache is None:
1✔
268
            compiled = FJS.compile(
1✔
269
                self.schema, self.handlers, dict(self.formats), use_default=False
270
            )
271
            fn = partial(compiled, custom_formats=self._format_validators)
1✔
272
            self._cache = typing.cast("ValidationFn", fn)
1✔
273

274
        with detailed_errors():
1✔
275
            self._cache(pyproject)
1✔
276
            return reduce(lambda acc, fn: fn(acc), self.extra_validations, pyproject)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc