• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SwissDataScienceCenter / renku-python / 6875247711

15 Nov 2023 09:16AM UTC coverage: 82.786% (-0.05%) from 82.831%
6875247711

Pull #3300

github

web-flow
Merge e2d3269e8 into 4726f660e
Pull Request #3300: chore: do not always retry load tests requests

25441 of 30731 relevant lines covered (82.79%)

3.12 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.24
/renku/core/migration/models/v3.py
1
# Copyright Swiss Data Science Center (SDSC). A partnership between
2
# École Polytechnique Fédérale de Lausanne (EPFL) and
3
# Eidgenössische Technische Hochschule Zürich (ETHZ).
4
#
5
# Licensed under the Apache License, Version 2.0 (the "License");
6
# you may not use this file except in compliance with the License.
7
# You may obtain a copy of the License at
8
#
9
#     http://www.apache.org/licenses/LICENSE-2.0
10
#
11
# Unless required by applicable law or agreed to in writing, software
12
# distributed under the License is distributed on an "AS IS" BASIS,
13
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
# See the License for the specific language governing permissions and
15
# limitations under the License.
16
"""Migration models V3."""
3✔
17

18
import os
3✔
19

20
from marshmallow import EXCLUDE, post_load, pre_load
3✔
21

22
from renku.command.schema.calamus import DateTimeList, JsonLDSchema, StringList, Uri, fields, prov, rdfs, renku, schema
3✔
23
from renku.core.migration.models.v9 import Person as OldPerson
3✔
24
from renku.core.migration.models.v9 import generate_project_id, wfprov
3✔
25
from renku.core.migration.utils import OLD_METADATA_PATH, generate_dataset_tag_id, generate_url_id, get_datasets_path
3✔
26
from renku.core.util import yaml
3✔
27
from renku.core.util.urls import get_host
3✔
28
from renku.domain_model.project_context import project_context
3✔
29

30

31
class Base:
3✔
32
    """Base class for migration models."""
33

34
    def __init__(self, **kwargs):
3✔
35
        """Initialize an instance."""
36
        kwargs.setdefault("_id", None)
3✔
37

38
        for k, v in kwargs.items():
3✔
39
            setattr(self, k, v)
3✔
40

41

42
class Person(Base):
3✔
43
    """Person migration model."""
44

45
    affiliation = None
3✔
46
    email = None
3✔
47
    name = None
3✔
48

49
    @classmethod
3✔
50
    def from_repository(cls, repository):
3✔
51
        """Create an instance from a repository."""
52
        user = repository.get_user()
2✔
53
        instance = cls(name=user.name, email=user.email)
2✔
54
        instance.fix_id()
2✔
55
        return instance
2✔
56

57
    def __init__(self, **kwargs):
3✔
58
        """Initialize an instance."""
59
        kwargs.setdefault("_id", None)
3✔
60
        super().__init__(**kwargs)
3✔
61

62
    @property
3✔
63
    def full_identity(self):
3✔
64
        """Return name, email, and affiliation."""
65
        email = f" <{self.email}>" if self.email else ""
3✔
66
        affiliation = f" [{self.affiliation}]" if self.affiliation else ""
3✔
67
        return f"{self.name}{email}{affiliation}"
3✔
68

69
    def fix_id(self):
3✔
70
        """Fixes the id of a Person if it is not set."""
71
        if not self._id or "mailto:None" in self._id or self._id.startswith("_:"):
3✔
72
            hostname = get_host()
3✔
73
            self._id = OldPerson.generate_id(email=self.email, full_identity=self.full_identity, hostname=hostname)
3✔
74

75

76
class Project(Base):
3✔
77
    """Project migration model."""
78

79
    agent_version = None
3✔
80

81
    @classmethod
3✔
82
    def from_yaml(cls, path):
3✔
83
        """Read content from YAML file."""
84
        data = yaml.read_yaml(path)
2✔
85
        self = ProjectSchemaV3().load(data)
2✔
86

87
        if not self.creator:
2✔
88
            self.creator = Person.from_repository(repository=project_context.repository)
2✔
89

90
        if not self.name:
2✔
91
            self.name = project_context.remote.name
×
92

93
        if not self._id or "NULL/NULL" in self._id:
2✔
94
            self._id = generate_project_id(name=self.name, creator=self.creator)
2✔
95

96
        return self
2✔
97

98
    def to_yaml(self, path):
3✔
99
        """Write content to a YAML file."""
100
        from renku import __version__
2✔
101

102
        self.agent_version = __version__
2✔
103

104
        data = ProjectSchemaV3().dump(self)
2✔
105
        yaml.write_yaml(path=path, data=data)
2✔
106

107

108
class Collection(Base):
3✔
109
    """Collection migration model."""
110

111
    def __init__(self, **kwargs):
3✔
112
        kwargs.setdefault("members", [])
×
113
        super().__init__(**kwargs)
×
114

115

116
class DatasetFile(Base):
3✔
117
    """DatasetFile migration model."""
118

119

120
class DatasetTag(Base):
3✔
121
    """DatasetTag migration model."""
122

123
    commit = None
3✔
124
    name = None
3✔
125

126
    def __init__(self, **kwargs):
3✔
127
        """Initialize an instance."""
128
        super().__init__(**kwargs)
3✔
129

130
        if not self._id or self._id.startswith("_:"):
3✔
131
            self._id = generate_dataset_tag_id(name=self.name, commit=self.commit)
3✔
132

133

134
class Language(Base):
3✔
135
    """Language migration model."""
136

137

138
class Url(Base):
3✔
139
    """Url migration model."""
140

141
    url = None
3✔
142
    url_id = None
3✔
143
    url_str = None
3✔
144

145
    def __init__(self, **kwargs):
3✔
146
        """Initialize an instance."""
147
        super().__init__(**kwargs)
3✔
148

149
        if isinstance(self.url, dict):
3✔
150
            self.url_id = self.url["@id"]
×
151
        elif isinstance(self.url, str):
3✔
152
            self.url_str = self.url
3✔
153

154
        if not self._id or self._id.startswith("_:"):
3✔
155
            self._id = generate_url_id(url_str=self.url_str, url_id=self.url_id)
3✔
156

157

158
class Dataset(Base):
3✔
159
    """Dataset migration model."""
160

161
    @classmethod
3✔
162
    def from_yaml(cls, path, commit=None):
3✔
163
        """Read content from YAML file."""
164
        data = yaml.read_yaml(path)
3✔
165
        self = DatasetSchemaV3(commit=commit).load(data)
3✔
166
        self._metadata_path = path
3✔
167
        return self
3✔
168

169
    def to_yaml(self, path=None):
3✔
170
        """Write content to a YAML file."""
171
        data = DatasetSchemaV3().dump(self)
3✔
172
        path = path or self._metadata_path or os.path.join(self.path, OLD_METADATA_PATH)
3✔
173
        yaml.write_yaml(path=path, data=data)
3✔
174

175

176
class PersonSchemaV3(JsonLDSchema):
3✔
177
    """Person schema."""
178

179
    class Meta:
3✔
180
        """Meta class."""
181

182
        rdf_type = [prov.Person, schema.Person]
3✔
183
        model = Person
3✔
184
        unknown = EXCLUDE
3✔
185

186
    _id = fields.Id()
3✔
187
    name = StringList(schema.name)
3✔
188
    email = fields.String(schema.email, load_default=None)
3✔
189
    label = StringList(rdfs.label, load_default=None)
3✔
190
    affiliation = StringList(schema.affiliation, load_default=None)
3✔
191
    alternate_name = StringList(schema.alternateName, load_default=None)
3✔
192

193
    @post_load
3✔
194
    def make_instance(self, data, **kwargs):
3✔
195
        """Transform loaded dict into corresponding object."""
196
        instance = JsonLDSchema.make_instance(self, data, **kwargs)
3✔
197
        instance.fix_id()
3✔
198
        return instance
3✔
199

200

201
class ProjectSchemaV3(JsonLDSchema):
3✔
202
    """Project Schema."""
203

204
    class Meta:
3✔
205
        """Meta class."""
206

207
        rdf_type = [prov.Location, schema.Project]
3✔
208
        model = Project
3✔
209
        unknown = EXCLUDE
3✔
210

211
    _id = fields.Id(load_default=None)
3✔
212
    agent_version = fields.String(schema.agent, load_default="pre-0.11.0")
3✔
213
    name = fields.String(schema.name, load_default=None)
3✔
214
    created = DateTimeList(schema.dateCreated, load_default=None)
3✔
215
    version = StringList(schema.schemaVersion, load_default="1")
3✔
216
    creator = fields.Nested(schema.creator, PersonSchemaV3, load_default=None)
3✔
217

218

219
class CreatorMixinSchemaV3(JsonLDSchema):
3✔
220
    """CreatorMixin schema."""
221

222
    creators = fields.Nested(schema.creator, PersonSchemaV3, many=True)
3✔
223

224

225
class CommitMixinSchemaV3(JsonLDSchema):
3✔
226
    """CommitMixin schema."""
227

228
    _id = fields.Id(load_default=None)
3✔
229
    _label = fields.String(rdfs.label, load_default=None)
3✔
230
    _project = fields.Nested(schema.isPartOf, ProjectSchemaV3, load_default=None)
3✔
231
    path = fields.String(prov.atLocation, load_default=None)
3✔
232

233

234
class EntitySchemaV3(CommitMixinSchemaV3):
3✔
235
    """Entity Schema."""
236

237
    class Meta:
3✔
238
        """Meta class."""
239

240
        rdf_type = [prov.Entity, wfprov.Artifact]
3✔
241

242

243
class CollectionSchemaV3(EntitySchemaV3):
3✔
244
    """Collection Schema."""
245

246
    class Meta:
3✔
247
        """Meta class."""
248

249
        rdf_type = [prov.Collection]
3✔
250
        model = Collection
3✔
251
        unknown = EXCLUDE
3✔
252

253
    members = fields.Nested(prov.hadMember, ["DatasetFileSchemaV3", "CollectionSchemaV3"], many=True)
3✔
254

255

256
class DatasetFileSchemaV3(EntitySchemaV3):
3✔
257
    """DatasetFile schema."""
258

259
    class Meta:
3✔
260
        """Meta class."""
261

262
        rdf_type = schema.DigitalDocument
3✔
263
        model = DatasetFile
3✔
264
        unknown = EXCLUDE
3✔
265

266
    added = fields.DateTime(schema.dateCreated)
3✔
267
    based_on = fields.Nested(schema.isBasedOn, "DatasetFileSchemaV3", load_default=None)
3✔
268
    name = fields.String(schema.name, load_default=None)
3✔
269
    url = fields.String(schema.url, load_default=None)
3✔
270
    external = fields.Boolean(renku.external, load_default=False)
3✔
271

272

273
class LanguageSchemaV3(JsonLDSchema):
3✔
274
    """Language schema."""
275

276
    class Meta:
3✔
277
        """Meta class."""
278

279
        rdf_type = schema.Language
3✔
280
        model = Language
3✔
281
        unknown = EXCLUDE
3✔
282

283
    alternate_name = fields.String(schema.alternateName)
3✔
284
    name = fields.String(schema.name)
3✔
285

286

287
class DatasetTagSchemaV3(JsonLDSchema):
3✔
288
    """DatasetTag schema."""
289

290
    class Meta:
3✔
291
        """Meta class."""
292

293
        rdf_type = schema.PublicationEvent
3✔
294
        model = DatasetTag
3✔
295
        unknown = EXCLUDE
3✔
296

297
    _id = fields.Id()
3✔
298
    commit = fields.String(schema.location)
3✔
299
    created = fields.DateTime(schema.startDate, load_default=None)
3✔
300
    dataset = fields.String(schema.about)
3✔
301
    description = fields.String(schema.description)
3✔
302
    name = fields.String(schema.name)
3✔
303

304

305
class UrlSchemaV3(JsonLDSchema):
3✔
306
    """Url schema."""
307

308
    class Meta:
3✔
309
        """Meta class."""
310

311
        rdf_type = schema.URL
3✔
312
        model = Url
3✔
313
        unknown = EXCLUDE
3✔
314

315
    _id = fields.Id(load_default=None)
3✔
316
    url = Uri(schema.url, load_default=None)
3✔
317

318

319
class DatasetSchemaV3(CreatorMixinSchemaV3, EntitySchemaV3):
3✔
320
    """Dataset schema."""
321

322
    class Meta:
3✔
323
        """Meta class."""
324

325
        rdf_type = schema.Dataset
3✔
326
        model = Dataset
3✔
327
        unknown = EXCLUDE
3✔
328

329
    creators = fields.Nested(schema.creator, PersonSchemaV3, many=True, load_default=None)
3✔
330
    date_created = fields.DateTime(schema.dateCreated, load_default=None)
3✔
331
    date_published = fields.DateTime(schema.datePublished, load_default=None)
3✔
332
    description = fields.String(schema.description, load_default=None)
3✔
333
    files = fields.Nested(schema.hasPart, [DatasetFileSchemaV3, CollectionSchemaV3], many=True)
3✔
334
    identifier = fields.String(schema.identifier)
3✔
335
    in_language = fields.Nested(schema.inLanguage, LanguageSchemaV3, load_default=None)
3✔
336
    keywords = fields.List(schema.keywords, fields.String(), load_default=None)
3✔
337
    license = Uri(schema.license, load_default=None, allow_none=True)
3✔
338
    name = fields.String(schema.alternateName, load_default=None)
3✔
339
    same_as = fields.Nested(schema.sameAs, UrlSchemaV3, load_default=None)
3✔
340
    tags = fields.Nested(schema.subjectOf, DatasetTagSchemaV3, many=True, load_default=None)
3✔
341
    title = fields.String(schema.name)
3✔
342
    url = fields.String(schema.url, load_default=None)
3✔
343
    version = fields.String(schema.version, load_default=None)
3✔
344

345
    @pre_load
3✔
346
    def fix_files_context(self, data, **kwargs):
3✔
347
        """Fix DatasetFile context for _label and external fields."""
348
        from renku.core.migration.utils import migrate_types
3✔
349

350
        data = migrate_types(data)
3✔
351

352
        if "@context" not in data:
3✔
353
            return data
3✔
354

355
        context = data["@context"]
3✔
356
        if not isinstance(context, dict) or "files" not in context:
3✔
357
            return data
×
358

359
        context.setdefault("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
3✔
360
        context.setdefault("_label", "rdfs:label")
3✔
361

362
        files = data["@context"]["files"]
3✔
363
        if not isinstance(files, dict) or "@context" not in files:
3✔
364
            return data
×
365

366
        context = files["@context"]
3✔
367
        context.setdefault("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
3✔
368
        context.setdefault("_label", "rdfs:label")
3✔
369
        context.setdefault("external", "renku:external")
3✔
370
        context.setdefault("renku", "https://swissdatasciencecenter.github.io/renku-ontology#")
3✔
371

372
        return data
3✔
373

374

375
def get_project_datasets():
3✔
376
    """Return Dataset migration models for a project."""
377
    paths = get_datasets_path().rglob(OLD_METADATA_PATH)
3✔
378
    datasets = []
3✔
379
    for path in paths:
3✔
380
        dataset = Dataset.from_yaml(path=path)
3✔
381
        dataset.path = getattr(dataset, "path", None) or os.path.relpath(path.parent, project_context.path)
3✔
382
        datasets.append(dataset)
3✔
383

384
    return datasets
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc