• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SwissDataScienceCenter / renku-data-services / 12047437567

27 Nov 2024 09:31AM UTC coverage: 86.006% (+0.1%) from 85.882%
12047437567

Pull #545

github

web-flow
Merge c434d43ab into 7ae3af62e
Pull Request #545: feat!: add support for session secrets

216 of 222 new or added lines in 10 files covered. (97.3%)

5 existing lines in 3 files now uncovered.

14682 of 17071 relevant lines covered (86.01%)

1.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.11
/components/renku_data_services/storage/models.py
1
"""Models for cloud storage."""
2

3
from collections.abc import Generator, MutableMapping
2✔
4
from typing import Any
2✔
5
from urllib.parse import ParseResult, urlparse
2✔
6

7
from pydantic import BaseModel, Field, PrivateAttr, model_serializer, model_validator
2✔
8
from ulid import ULID
2✔
9

10
from renku_data_services import errors
2✔
11
from renku_data_services.storage.rclone import RCloneValidator
2✔
12

13

14
class RCloneConfig(BaseModel, MutableMapping):
2✔
15
    """Class for RClone configuration that is valid."""
16

17
    config: dict[str, Any] = Field(exclude=True)
2✔
18

19
    _validator: RCloneValidator = PrivateAttr(default=RCloneValidator())
2✔
20

21
    @model_validator(mode="after")
2✔
22
    def check_rclone_schema(self) -> "RCloneConfig":
2✔
23
        """Validate that the reclone config is valid."""
24
        self._validator.validate(self.config)
1✔
25
        return self
1✔
26

27
    @model_serializer
2✔
28
    def serialize_model(self) -> dict[str, Any]:
2✔
29
        """Serialize model by returning contained dict."""
30
        return self.config
1✔
31

32
    def __len__(self) -> int:
2✔
33
        return len(self.config)
×
34

35
    def __getitem__(self, k: str) -> Any:
2✔
36
        return self.config[k]
1✔
37

38
    def __setitem__(self, key: str, value: Any) -> None:
2✔
39
        self.config[key] = value
×
40
        self._validator.validate(self.config)
×
41

42
    def __delitem__(self, key: str) -> None:
2✔
43
        del self.config[key]
×
44
        self._validator.validate(self.config)
×
45

46
    def __iter__(self) -> Generator[str, None, None]:  # type: ignore[override]
2✔
47
        """Iterate method.
48

49
        Needed for pydantic to properly serialize the object.
50
        """
51
        yield from self.config.keys()
1✔
52

53

54
class UnsavedCloudStorage(BaseModel):
2✔
55
    """Cloud Storage model."""
56

57
    project_id: str = Field(pattern=r"^[A-Z0-9]+$")
2✔
58
    name: str = Field(min_length=3)
2✔
59
    storage_type: str = Field(pattern=r"^[a-z0-9]+$")
2✔
60
    configuration: RCloneConfig
2✔
61
    readonly: bool = Field(default=True)
2✔
62

63
    source_path: str = Field()
2✔
64
    """Path inside the cloud storage.
2✔
65

66
    Note: Since rclone itself doesn't really know about buckets/containers (they're not in the schema),
67
    bucket/container/etc. has to be the first part of source path.
68
    """
69

70
    target_path: str = Field(min_length=1)
2✔
71
    """Path inside the target repository to mount/clone data to."""
2✔
72

73
    @classmethod
2✔
74
    def from_dict(cls, data: dict) -> "UnsavedCloudStorage":
2✔
75
        """Create the model from a plain dictionary."""
76

77
        if "project_id" not in data:
2✔
78
            raise errors.ValidationError(message="'project_id' not set")
×
79
        if "configuration" not in data:
2✔
80
            raise errors.ValidationError(message="'configuration' not set")
×
81

82
        if "source_path" not in data:
2✔
83
            raise errors.ValidationError(message="'source_path' not set")
×
84

85
        if "target_path" not in data:
2✔
86
            raise errors.ValidationError(message="'target_path' not set")
×
87

88
        if "type" not in data["configuration"]:
2✔
89
            raise errors.ValidationError(message="'type' not set in 'configuration'")
2✔
90

91
        return cls(
1✔
92
            project_id=data["project_id"],
93
            name=data["name"],
94
            configuration=RCloneConfig(config=data["configuration"]),
95
            storage_type=data["configuration"]["type"],
96
            source_path=data["source_path"],
97
            target_path=data["target_path"],
98
            readonly=data.get("readonly", True),
99
        )
100

101
    @classmethod
2✔
102
    def from_url(
2✔
103
        cls, storage_url: str, name: str, readonly: bool, project_id: str, target_path: str
104
    ) -> "UnsavedCloudStorage":
105
        """Get Cloud Storage/rclone config from a storage URL.
106

107
        Example:
108
            Supported URLs are:
109
            - s3://s3.<region>.amazonaws.com/<bucket>/<path>
110
            - s3://<bucket>.s3.<region>.amazonaws.com/<path>
111
            - s3://bucket/
112
            - http(s)://<endpoint>/<bucket>/<path>
113
            - (azure|az)://<account>.dfs.core.windows.net/<container>/<path>
114
            - (azure|az)://<account>.blob.core.windows.net/<container>/<path>
115
            - (azure|az)://<container>/<path>
116
        """
117
        parsed_url = urlparse(storage_url)
1✔
118

119
        if parsed_url.scheme is None:
1✔
120
            raise errors.ValidationError(message="Couldn't parse scheme of 'storage_url'")
×
121

122
        match parsed_url.scheme:
1✔
123
            case "s3":
1✔
124
                return UnsavedCloudStorage.from_s3_url(parsed_url, project_id, name, readonly, target_path)
1✔
125
            case "azure" | "az":
1✔
126
                return UnsavedCloudStorage.from_azure_url(parsed_url, project_id, name, readonly, target_path)
1✔
127
            case "http" | "https":
1✔
128
                return UnsavedCloudStorage._from_ambiguous_url(parsed_url, project_id, name, readonly, target_path)
1✔
UNCOV
129
            case _:
×
UNCOV
130
                raise errors.ValidationError(message=f"Scheme '{parsed_url.scheme}' is not supported.")
×
131

132
    @classmethod
2✔
133
    def from_s3_url(
2✔
134
        cls, storage_url: ParseResult, project_id: str, name: str, readonly: bool, target_path: str
135
    ) -> "UnsavedCloudStorage":
136
        """Get Cloud storage from an S3 URL.
137

138
        Example:
139
            Supported URLs are:
140
            - s3://s3.<region>.amazonaws.com/<bucket>/<path>
141
            - s3://<bucket>.s3.<region>.amazonaws.com/<path>
142
            - s3://bucket/
143
            - https://<endpoint>/<bucket>/<path>
144
        """
145

146
        if storage_url.hostname is None:
1✔
147
            raise errors.ValidationError(message="Storage URL must contain a host")
×
148

149
        configuration = {"type": "s3"}
1✔
150
        source_path = storage_url.path.lstrip("/")
1✔
151

152
        if storage_url.scheme == "s3":
1✔
153
            configuration["provider"] = "AWS"
1✔
154
            match storage_url.hostname.split(".", 4):
1✔
155
                case ["s3", region, "amazonaws", "com"]:
1✔
156
                    configuration["region"] = region
1✔
157
                case [bucket, "s3", region, "amazonaws", "com"]:
1✔
158
                    configuration["region"] = region
1✔
159
                    source_path = f"{bucket}{storage_url.path}"
1✔
160
                case _:
1✔
161
                    # URL like 's3://giab/' where the bucket is the
162
                    source_path = f"{storage_url.hostname}/{source_path}" if source_path else storage_url.hostname
1✔
163
        else:
164
            configuration["endpoint"] = storage_url.netloc
1✔
165

166
        return UnsavedCloudStorage(
1✔
167
            project_id=project_id,
168
            name=name,
169
            storage_type="s3",
170
            configuration=RCloneConfig(config=configuration),
171
            source_path=source_path,
172
            target_path=target_path,
173
            readonly=readonly,
174
        )
175

176
    @classmethod
2✔
177
    def from_azure_url(
2✔
178
        cls, storage_url: ParseResult, project_id: str, name: str, readonly: bool, target_path: str
179
    ) -> "UnsavedCloudStorage":
180
        """Get Cloud storage from an Azure URL.
181

182
        Example:
183
            Supported URLs are:
184
            - (azure|az)://<account>.dfs.core.windows.net/<container>/<path>
185
            - (azure|az)://<account>.blob.core.windows.net/<container>/<path>
186
            - (azure|az)://<container>/<path>
187
        """
188
        if storage_url.hostname is None:
1✔
189
            raise errors.ValidationError(message="Storage URL must contain a host")
×
190

191
        configuration = {"type": "azureblob"}
1✔
192
        source_path = storage_url.path.lstrip("/")
1✔
193

194
        match storage_url.hostname.split(".", 5):
1✔
195
            case [account, "dfs", "core", "windows", "net"] | [account, "blob", "core", "windows", "net"]:
1✔
196
                configuration["account"] = account
1✔
197
            case _:
1✔
198
                if "." in storage_url.hostname:
1✔
199
                    raise errors.ValidationError(message="Host cannot contain dots unless it's a core.windows.net URL")
×
200

201
                source_path = f"{storage_url.hostname}{storage_url.path}"
1✔
202
        return UnsavedCloudStorage(
1✔
203
            project_id=project_id,
204
            name=name,
205
            storage_type="azureblob",
206
            configuration=RCloneConfig(config=configuration),
207
            source_path=source_path,
208
            target_path=target_path,
209
            readonly=readonly,
210
        )
211

212
    @classmethod
2✔
213
    def _from_ambiguous_url(
2✔
214
        cls, storage_url: ParseResult, project_id: str, name: str, readonly: bool, target_path: str
215
    ) -> "UnsavedCloudStorage":
216
        """Get cloud storage from an ambiguous storage url."""
217
        if storage_url.hostname is None:
1✔
218
            raise errors.ValidationError(message="Storage URL must contain a host")
×
219

220
        if storage_url.hostname.endswith(".windows.net"):
1✔
221
            return UnsavedCloudStorage.from_azure_url(storage_url, project_id, name, readonly, target_path)
×
222

223
        # default to S3 for unknown URLs, since these are way more common
224
        return UnsavedCloudStorage.from_s3_url(storage_url, project_id, name, readonly, target_path)
1✔
225

226

227
class CloudStorage(UnsavedCloudStorage):
2✔
228
    """Cloudstorage saved in the database."""
229

230
    storage_id: ULID = Field()
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc