• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SwissDataScienceCenter / renku-data-services / 18123243513

30 Sep 2025 08:10AM UTC coverage: 86.702% (-0.01%) from 86.714%
18123243513

Pull #1019

github

web-flow
Merge e726c4543 into 0690bab65
Pull Request #1019: feat: Attempt to support dockerhub private images

70 of 101 new or added lines in 9 files covered. (69.31%)

106 existing lines in 6 files now uncovered.

22357 of 25786 relevant lines covered (86.7%)

1.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.88
/components/renku_data_services/notebooks/api/classes/image.py
1
"""Used to get information about docker images used in jupyter servers."""
2

3
from __future__ import annotations
2✔
4

5
import base64
2✔
6
import re
2✔
7
from dataclasses import dataclass, field
2✔
8
from enum import Enum
2✔
9
from pathlib import PurePosixPath
2✔
10
from typing import Any, Optional, Self, cast
2✔
11

12
import httpx
2✔
13
from werkzeug.datastructures import WWWAuthenticate
2✔
14

15
from renku_data_services.app_config import logging
2✔
16
from renku_data_services.errors import errors
2✔
17

18
logger = logging.getLogger(__name__)
2✔
19

20

21
class ManifestTypes(Enum):
2✔
22
    """The mime types for docker image manifests."""
23

24
    docker_v2 = "application/vnd.docker.distribution.manifest.v2+json"
2✔
25
    docker_v2_list = "application/vnd.docker.distribution.manifest.list.v2+json"
2✔
26
    oci_v1_manifest = "application/vnd.oci.image.manifest.v1+json"
2✔
27
    oci_v1_index = "application/vnd.oci.image.index.v1+json"
2✔
28

29

30
DEFAULT_PLATFORM_ARCHITECTURE = "amd64"
2✔
31
DEFAULT_PLATFORM_OS = "linux"
2✔
32

33

34
@dataclass
2✔
35
class ImageRepoDockerAPI:
2✔
36
    """Used to query the docker image repository API.
37

38
    Please note that all image repositories use this API, not just Dockerhub.
39
    """
40

41
    hostname: str
2✔
42
    oauth2_token: Optional[str] = field(default=None, repr=False)
2✔
43
    username: str = "oauth2"
2✔
44

45
    # NOTE: We need to follow redirects so that we can authenticate with the image repositories properly.
46
    # NOTE: If we do not use default_factory to create the client here requests will fail because it can happen
47
    # that the client gets created in the wrong asyncio loop.
48
    client: httpx.AsyncClient = field(default_factory=lambda: httpx.AsyncClient(timeout=10, follow_redirects=True))
2✔
49
    scheme: str = "https"
2✔
50

51
    def __post_init__(self) -> None:
2✔
52
        self.hostname = self.hostname.rstrip("/")
1✔
53
        if self.scheme == "":
1✔
54
            self.scheme = "https"
×
55

56
    async def _get_docker_token(self, image: Image) -> Optional[str]:
2✔
57
        """Get an authorization token from the docker v2 API.
58

59
        This will return the token provided by the API (or None if no token was found).
60
        """
61
        image_digest_url = f"{self.scheme}://{self.hostname}/v2/{image.name}/manifests/{image.tag}"
1✔
62
        try:
1✔
63
            auth_req = await self.client.get(image_digest_url)
1✔
64
        except httpx.ConnectError:
×
65
            auth_req = None
×
66
        if auth_req is None or not (auth_req.status_code == 401 and "Www-Authenticate" in auth_req.headers):
1✔
67
            # the request status code and header are not what is expected
68
            return None
×
69
        www_auth = WWWAuthenticate.from_header(auth_req.headers["Www-Authenticate"])
1✔
70
        if not www_auth:
1✔
71
            return None
×
72
        params = {**www_auth.parameters}
1✔
73
        realm = params.pop("realm")
1✔
74
        if not realm:
1✔
75
            return None
×
76
        headers = {"Accept": "application/json"}
1✔
77
        if self.oauth2_token:
1✔
NEW
78
            logger.debug(f"Use credentials for user: {self.username}")
×
NEW
79
            creds = base64.b64encode(f"{self.username}:{self.oauth2_token}".encode()).decode()
×
UNCOV
80
            headers["Authorization"] = f"Basic {creds}"
×
81
        token_req = await self.client.get(realm, params=params, headers=headers)
1✔
82
        logger.debug(f"Docker token response for {self.username}: {token_req.status_code}")
1✔
83
        return str(token_req.json().get("token"))
1✔
84

85
    async def get_image_manifest(
2✔
86
        self,
87
        image: Image,
88
        platform_architecture: str = DEFAULT_PLATFORM_ARCHITECTURE,
89
        platform_os: str = DEFAULT_PLATFORM_OS,
90
    ) -> Optional[dict[str, Any]]:
91
        """Query the docker API to get the manifest of an image."""
92
        if image.hostname != self.hostname:
1✔
93
            raise errors.ValidationError(
×
94
                message=f"The image hostname {image.hostname} does not match the image repository {self.hostname}"
95
            )
96
        token = await self._get_docker_token(image)
1✔
97
        image_digest_url = f"{self.scheme}://{image.hostname}/v2/{image.name}/manifests/{image.tag}"
1✔
98
        headers = {"Accept": ManifestTypes.docker_v2.value}
1✔
99
        if token:
1✔
100
            headers["Authorization"] = f"Bearer {token}"
1✔
101
        res = await self.client.get(image_digest_url, headers=headers)
1✔
102
        if res.status_code != 200:
1✔
103
            headers["Accept"] = ManifestTypes.oci_v1_manifest.value
1✔
104
            res = await self.client.get(image_digest_url, headers=headers)
1✔
105
        if res.status_code != 200:
1✔
106
            headers["Accept"] = ManifestTypes.oci_v1_index.value
1✔
107
            res = await self.client.get(image_digest_url, headers=headers)
1✔
108
        if res.status_code != 200:
1✔
109
            return None
1✔
110

111
        content_type = res.headers.get("Content-Type")
1✔
112
        if content_type in [ManifestTypes.docker_v2_list.value, ManifestTypes.oci_v1_index.value]:
1✔
113
            index_parsed = res.json()
1✔
114

115
            def platform_matches(manifest: dict[str, Any]) -> bool:
1✔
116
                platform: dict[str, Any] = manifest.get("platform", {})
1✔
117
                return platform.get("architecture") == platform_architecture and platform.get("os") == platform_os
1✔
118

119
            manifest: dict[str, Any] = next(filter(platform_matches, index_parsed.get("manifests", [])), {})
1✔
120
            image_digest: str | None = manifest.get("digest")
1✔
121
            if not manifest or not image_digest:
1✔
122
                return None
×
123
            image_digest_url = f"{self.scheme}://{image.hostname}/v2/{image.name}/manifests/{image_digest}"
1✔
124
            media_type = manifest.get("mediaType")
1✔
125
            headers["Accept"] = ManifestTypes.docker_v2.value
1✔
126
            if media_type in [
1✔
127
                ManifestTypes.docker_v2.value,
128
                ManifestTypes.oci_v1_manifest.value,
129
            ]:
130
                headers["Accept"] = media_type
1✔
131
            res = await self.client.get(image_digest_url, headers=headers)
1✔
132
            if res.status_code != 200:
1✔
133
                headers["Accept"] = ManifestTypes.oci_v1_manifest.value
×
134
                res = await self.client.get(image_digest_url, headers=headers)
×
135
            if res.status_code != 200:
1✔
136
                return None
×
137

138
        if res.headers.get("Content-Type") not in [
1✔
139
            ManifestTypes.docker_v2.value,
140
            ManifestTypes.oci_v1_manifest.value,
141
        ]:
142
            return None
×
143

144
        return cast(dict[str, Any], res.json())
1✔
145

146
    async def image_exists(self, image: Image) -> bool:
2✔
147
        """Check the docker repo API if the image exists."""
148
        return await self.image_check(image) == 200
1✔
149

150
    async def image_check(self, image: Image) -> int:
2✔
151
        """Check the image at the registry."""
152
        token = await self._get_docker_token(image)
1✔
153
        image_digest_url = f"{self.scheme}://{image.hostname}/v2/{image.name}/manifests/{image.tag}"
1✔
154
        accept_media = ",".join(
1✔
155
            [e.value for e in [ManifestTypes.docker_v2, ManifestTypes.oci_v1_manifest, ManifestTypes.oci_v1_index]]
156
        )
157
        headers = {"Accept": accept_media}
1✔
158
        if token:
1✔
159
            headers["Authorization"] = f"Bearer {token}"
1✔
160

161
        res = await self.client.head(image_digest_url, headers=headers)
1✔
162
        logger.debug(f"Checked image access: {image_digest_url}: {res.status_code}")
1✔
163
        return res.status_code
1✔
164

165
    async def get_image_config(self, image: Image) -> Optional[dict[str, Any]]:
2✔
166
        """Query the docker API to get the configuration of an image."""
167
        manifest = await self.get_image_manifest(image)
1✔
168
        if manifest is None:
1✔
169
            return None
1✔
170
        config_digest = manifest.get("config", {}).get("digest")
1✔
171
        if config_digest is None:
1✔
172
            return None
×
173
        token = await self._get_docker_token(image)
1✔
174
        res = await self.client.get(
1✔
175
            f"{self.scheme}://{image.hostname}/v2/{image.name}/blobs/{config_digest}",
176
            headers={
177
                "Accept": "application/json",
178
                "Authorization": f"Bearer {token}",
179
            },
180
        )
181
        if res.status_code != 200:
1✔
182
            return None
×
183
        return cast(dict[str, Any], res.json())
1✔
184

185
    async def image_workdir(self, image: Image) -> Optional[PurePosixPath]:
2✔
186
        """Query the docker API to get the workdir of an image."""
187
        config = await self.get_image_config(image)
1✔
188
        if config is None:
1✔
189
            return None
1✔
190
        nested_config = config.get("config", {})
1✔
191
        if nested_config is None:
1✔
192
            return None
×
193
        workdir = nested_config.get("WorkingDir", "/")
1✔
194
        if workdir == "":
1✔
195
            workdir = "/"
×
196
        return PurePosixPath(workdir)
1✔
197

198
    def with_oauth2_token(self, oauth2_token: str, user: str | None = None) -> ImageRepoDockerAPI:
2✔
199
        """Return a docker API instance with the token as authentication."""
NEW
200
        return ImageRepoDockerAPI(
×
201
            hostname=self.hostname, scheme=self.scheme, oauth2_token=oauth2_token, username=user or self.username
202
        )
203

204
    def with_user_name(self, user: str) -> ImageRepoDockerAPI:
2✔
205
        """Return a docker api instance with the given user set."""
NEW
206
        return ImageRepoDockerAPI(
×
207
            hostname=self.hostname, scheme=self.scheme, oauth2_token=self.oauth2_token, username=user
208
        )
209

210
    def maybe_with_oauth2_token(self, token_hostname: str | None, oauth2_token: str | None) -> ImageRepoDockerAPI:
2✔
211
        """Return a docker API instance with the token as authentication.
212

213
        The token is used only if the image hostname matches the token hostname.
214
        """
215
        if isinstance(token_hostname, str) and self.hostname == token_hostname and oauth2_token:
1✔
216
            return ImageRepoDockerAPI(self.hostname, oauth2_token)
×
217
        else:
218
            return self
1✔
219

220

221
@dataclass
2✔
222
class Image:
2✔
223
    """Representation of a docker image."""
224

225
    hostname: str
2✔
226
    name: str
2✔
227
    tag: str
2✔
228

229
    @classmethod
2✔
230
    def from_path(cls, path: str) -> Self:
2✔
231
        """Create an image from a path like 'nginx:1.28'."""
232

233
        def build_re(*parts: str) -> re.Pattern:
2✔
234
            """Assemble the regex."""
235
            return re.compile(r"^" + r"".join(parts) + r"$")
2✔
236

237
        hostname = r"(?P<hostname>(?<=^)[a-zA-Z0-9_\-]{1,}\.[a-zA-Z0-9\._\-:]{1,}(?=\/))"
2✔
238
        docker_username = r"(?P<username>(?<=^)[a-zA-Z0-9]{1,}(?=\/))"
2✔
239
        username = r"(?P<username>(?<=\/)[a-zA-Z0-9\._\-]{1,}(?=\/))"
2✔
240
        docker_image = r"(?P<image>(?:(?<=\/)|(?<=^))[a-zA-Z0-9\._\-]{1,}(?:(?=:)|(?=@)|(?=$)))"
2✔
241
        image = r"(?P<image>(?:(?<=\/)|(?<=^))[a-zA-Z0-9\._\-\/]{1,}(?:(?=:)|(?=@)|(?=$)))"
2✔
242
        sha = r"(?P<tag>(?<=@)[a-zA-Z0-9\._\-:]{1,}(?=$))"
2✔
243
        tag = r"(?P<tag>(?<=:)[a-zA-Z0-9\._\-]{1,}(?=$))"
2✔
244

245
        # a list of tuples with (regex, defaults to fill in case of match)
246
        regexes: list[tuple[re.Pattern, dict[str, str]]] = [
2✔
247
            # nginx
248
            (
249
                build_re(docker_image),
250
                {
251
                    "hostname": "registry-1.docker.io",
252
                    "username": "library",
253
                    "tag": "latest",
254
                },
255
            ),
256
            # username/image
257
            (
258
                build_re(docker_username, r"\/", docker_image),
259
                {"hostname": "registry-1.docker.io", "tag": "latest"},
260
            ),
261
            # nginx:1.28
262
            (
263
                build_re(docker_image, r":", tag),
264
                {"hostname": "registry-1.docker.io", "username": "library"},
265
            ),
266
            # username/image:1.0.0
267
            (
268
                build_re(docker_username, r"\/", docker_image, r":", tag),
269
                {"hostname": "registry-1.docker.io"},
270
            ),
271
            # nginx@sha256:24235rt2rewg345ferwf
272
            (
273
                build_re(docker_image, r"@", sha),
274
                {"hostname": "registry-1.docker.io", "username": "library"},
275
            ),
276
            # username/image@sha256:fdsaf345tre3412t1413r
277
            (
278
                build_re(docker_username, r"\/", docker_image, r"@", sha),
279
                {"hostname": "registry-1.docker.io"},
280
            ),
281
            # gitlab.com/username/project
282
            # gitlab.com/username/project/image/subimage
283
            (build_re(hostname, r"\/", username, r"\/", image), {"tag": "latest"}),
284
            # gitlab.com/username/project:1.2.3
285
            # gitlab.com/username/project/image/subimage:1.2.3
286
            (build_re(hostname, r"\/", username, r"\/", image, r":", tag), {}),
287
            # gitlab.com/username/project@sha256:324fet13t4
288
            # gitlab.com/username/project/image/subimage@sha256:324fet13t4
289
            (build_re(hostname, r"\/", username, r"\/", image, r"@", sha), {}),
290
        ]
291

292
        matches = []
2✔
293
        for regex, fill in regexes:
2✔
294
            match = regex.match(path)
2✔
295
            if match is not None:
2✔
296
                match_dict = match.groupdict()
2✔
297
                match_dict.update(fill)
2✔
298
                # lump username in image name - not required to have it separate
299
                # however separating these in the regex makes it easier to match
300
                match_dict["image"] = match_dict["username"] + "/" + match_dict["image"]
2✔
301
                match_dict.pop("username")
2✔
302
                matches.append(match_dict)
2✔
303
        if len(matches) == 1:
2✔
304
            return cls(matches[0]["hostname"], matches[0]["image"], matches[0]["tag"])
2✔
305
        elif len(matches) > 1:
×
306
            raise errors.ValidationError(message=f"Cannot parse the image {path}, too many interpretations {matches}")
×
307
        else:
308
            raise errors.ValidationError(message=f"Cannot parse the image {path}")
×
309

310
    def repo_api(self) -> ImageRepoDockerAPI:
2✔
311
        """Get the docker API from the image."""
312
        return ImageRepoDockerAPI(self.hostname)
1✔
313

314
    def __str__(self) -> str:
2✔
315
        return f"{self.hostname}/{self.name}:{self.tag}"
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc