• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IGVF-DACC / igvfd / #5540

28 Jun 2024 04:23PM UTC coverage: 88.264% (+0.006%) from 88.258%
#5540

Pull #970

coveralls-python

ian-whaling
fixed audit & tests
Pull Request #970: IGVF-1756-CRISPR-NTRs-audit

32 of 32 new or added lines in 2 files covered. (100.0%)

1 existing line in 1 file now uncovered.

6295 of 7132 relevant lines covered (88.26%)

0.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.74
/src/igvfd/types/file_set.py
1
from snovault import (
1✔
2
    abstract_collection,
3
    calculated_property,
4
    collection,
5
    load_schema,
6
)
7
from snovault.util import Path
1✔
8

9
from .base import (
1✔
10
    Item,
11
    paths_filtered_by_status
12
)
13

14
from datetime import datetime
1✔
15

16

17
def get_donors_from_samples(request, samples):
1✔
18
    donor_objects = []
1✔
19
    for sample in samples:
1✔
20
        donor_objects += request.embed(sample, '@@object').get('donors', [])
1✔
21
    return list(set(donor_objects))
1✔
22

23

24
@abstract_collection(
1✔
25
    name='file-sets',
26
    unique_key='accession',
27
    properties={
28
        'title': 'File Sets',
29
        'description': 'Listing of file sets',
30
    }
31
)
32
class FileSet(Item):
1✔
33
    item_type = 'file_set'
1✔
34
    base_types = ['FileSet'] + Item.base_types
1✔
35
    name_key = 'accession'
1✔
36
    schema = load_schema('igvfd:schemas/file_set.json')
1✔
37
    rev = {
1✔
38
        'files': ('File', 'file_set'),
39
        'control_for': ('FileSet', 'control_file_sets'),
40
        'input_file_set_for': ('FileSet', 'input_file_sets')
41
    }
42
    embedded_with_frame = [
1✔
43
        Path('award.contact_pi', include=['@id', 'contact_pi', 'component', 'title']),
44
        Path('lab', include=['@id', 'title']),
45
        Path('submitted_by', include=['@id', 'title']),
46
        Path('files', include=['@id', 'accession', 'aliases', 'content_type',
47
             'file_format', 'file_size', 'href', 's3_uri', 'submitted_file_name',
48
                               'creation_timestamp', 'sequencing_platform']),
49
        Path('control_for', include=['@id', 'accession', 'aliases']),
50
        Path('donors', include=['@id', 'accession', 'aliases', 'sex', 'status', 'taxa']),
51
        Path('samples.sample_terms', include=[
52
            '@id',
53
            '@type',
54
            'accession',
55
            'aliases',
56
            'treatments',
57
            'cell_fate_change_treatments',
58
            'classification',
59
            'construct_library_sets',
60
            'disease_terms',
61
            'modifications',
62
            'sample_terms',
63
            'status',
64
            'summary',
65
            'targeted_sample_term',
66
            'taxa',
67
            'term_name',
68
        ]),
69
        Path('samples.disease_terms', include=['@id', 'term_name']),
70
        Path('samples.targeted_sample_term', include=['@id', 'term_name']),
71
    ]
72

73
    audit_inherit = [
1✔
74
        'award',
75
        'lab',
76
        'files',
77
        'documents',
78
        'samples',
79
        'samples.sample_terms',
80
        'samples.disease_terms',
81
        'samples.treatments',
82
        'samples.modifications',
83
        'donors',
84
    ]
85

86
    set_status_up = [
1✔
87
        'documents',
88
        'files',
89
        'input_file_sets',
90
        'samples'
91
    ]
92
    set_status_down = [
1✔
93
    ]
94

95
    @calculated_property(schema={
1✔
96
        'title': 'Files',
97
        'type': 'array',
98
        'description': 'The files associated with this file set.',
99
        'minItems': 1,
100
        'uniqueItems': True,
101
        'items': {
102
            'title': 'File',
103
            'type': ['string', 'object'],
104
            'linkFrom': 'File.file_set',
105
        },
106
        'notSubmittable': True
107
    })
108
    def files(self, request, files):
1✔
109
        return paths_filtered_by_status(request, files)
1✔
110

111
    @calculated_property(schema={
1✔
112
        'title': 'File Sets Controlled By This File Set',
113
        'type': 'array',
114
        'description': 'The file sets for which this file set is a control.',
115
        'minItems': 1,
116
        'uniqueItems': True,
117
        'items': {
118
            'title': 'File Set Controlled By This File Set',
119
            'type': ['string', 'object'],
120
            'linkFrom': 'FileSet.control_file_sets',
121
        },
122
        'notSubmittable': True
123
    })
124
    def control_for(self, request, control_for):
1✔
125
        return paths_filtered_by_status(request, control_for)
1✔
126

127
    @calculated_property(schema={
1✔
128
        'title': 'Submitted Files Timestamp',
129
        'description': 'The timestamp the first file object in the file_set or associated auxiliary sets was created.',
130
        'comment': 'Do not submit. The timestamp is automatically calculated.',
131
        'type': 'string',
132
        'format': 'date-time',
133
        'notSubmittable': True
134
    })
135
    def submitted_files_timestamp(self, request, files, auxiliary_sets=[]):
1✔
136
        timestamps = set()
1✔
137
        files_to_traverse = []
1✔
138
        if files:
1✔
139
            files_to_traverse.extend(files)
1✔
140
        if auxiliary_sets:
1✔
141
            for auxiliary_set in auxiliary_sets:
1✔
142
                aux_set_object = request.embed(auxiliary_set, '@@object_with_select_calculated_properties?field=files')
1✔
143
                if 'files' in aux_set_object:
1✔
144
                    files_to_traverse.extend(aux_set_object['files'])
1✔
145
        for current_file_path in files_to_traverse:
1✔
146
            file_object = request.embed(current_file_path, '@@object?skip_calculated=true')
1✔
147
            timestamp = file_object.get('creation_timestamp', None)
1✔
148
            if timestamp:
1✔
149
                timestamps.add(timestamp)
1✔
150
        if timestamps:
1✔
151
            res = sorted(timestamps, key=lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S.%f%z'))
1✔
152
            return res[0]
1✔
153

154
    @calculated_property(schema={
1✔
155
        'title': 'Input File Set For',
156
        'description': 'The file sets that use this file set as an input.',
157
        'type': 'array',
158
        'minItems': 1,
159
        'uniqueItems': True,
160
        'items': {
161
            'title': 'Input File Set For',
162
            'type': ['string', 'object'],
163
            'linkFrom': 'FileSet.input_file_sets',
164
        },
165
        'notSubmittable': True
166
    })
167
    def input_file_set_for(self, request, input_file_set_for):
1✔
168
        return paths_filtered_by_status(request, input_file_set_for)
1✔
169

170

171
@collection(
1✔
172
    name='analysis-sets',
173
    unique_key='accession',
174
    properties={
175
        'title': 'Analysis Sets',
176
        'description': 'Listing of analysis sets',
177
    }
178
)
179
class AnalysisSet(FileSet):
1✔
180
    item_type = 'analysis_set'
1✔
181
    schema = load_schema('igvfd:schemas/analysis_set.json')
1✔
182
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
183
        Path('input_file_sets', include=['@id', 'accession', 'aliases', 'file_set_type'])
184
    ]
185
    audit_inherit = FileSet.audit_inherit
1✔
186
    set_status_up = FileSet.set_status_up + []
1✔
187
    set_status_down = FileSet.set_status_down + []
1✔
188

189
    @calculated_property(
1✔
190
        schema={
191
            'title': 'Summary',
192
            'type': 'string',
193
            'notSubmittable': True,
194
        }
195
    )
196
    def summary(self, request, file_set_type, input_file_sets=[]):
1✔
197
        sentence = f'{file_set_type}'
1✔
198
        inspected_filesets = set()
1✔
199
        assay_terms = set()
1✔
200
        fileset_types = set()
1✔
201
        if input_file_sets:
1✔
202
            filesets_to_inspect = set(input_file_sets.copy())
1✔
203

204
            while filesets_to_inspect:
1✔
205
                input_fileset = filesets_to_inspect.pop()
1✔
206
                if input_fileset not in inspected_filesets:
1✔
207
                    inspected_filesets.add(input_fileset)
1✔
208
                    fileset_object = request.embed(input_fileset, '@@object?skip_calculated=true')
1✔
209
                    if input_fileset.startswith('/measurement-sets/'):
1✔
210
                        if 'preferred_assay_title' in fileset_object:
1✔
211
                            assay_terms.add(fileset_object['preferred_assay_title'])
1✔
212
                        else:
213
                            assay_terms.add(request.embed(fileset_object['assay_term'],
1✔
214
                                            '@@object?skip_calculated=true')['term_name'])
215
                    elif not input_fileset.startswith('/analysis-sets/'):
1✔
216
                        fileset_types.add(fileset_object['file_set_type'])
1✔
217
                    elif (input_fileset.startswith('/analysis-sets/') and
1✔
218
                          fileset_object.get('input_file_sets', False)):
219
                        for candidate_fileset in fileset_object.get('input_file_sets'):
1✔
220
                            if candidate_fileset not in inspected_filesets:
1✔
221
                                filesets_to_inspect.add(candidate_fileset)
1✔
222
        if assay_terms:
1✔
223
            terms = ', '.join(sorted(assay_terms))
1✔
224
            sentence += f' of {terms} data'
1✔
225
        elif fileset_types:
1✔
226
            terms = ', '.join(sorted(fileset_types))
1✔
227
            sentence += f' of {terms} data'
1✔
228
        else:
229
            sentence += f' of data'
1✔
230
        return sentence
1✔
231

232
    @calculated_property(
1✔
233
        schema={
234
            'title': 'Assay Titles',
235
            'description': 'Title(s) of assays that produced data analyzed in the analysis set.',
236
            'type': 'array',
237
            'minItems': 1,
238
            'uniqueItems': True,
239
            'items': {
240
                'title': 'Assay Title',
241
                'description': 'Title of assay that produced data analyzed in the analysis set.',
242
                'type': 'string'
243
            },
244
            'notSubmittable': True,
245
        }
246
    )
247
    def assay_titles(self, request, input_file_sets=None):
1✔
248
        assay_title = set()
1✔
249
        if input_file_sets is not None:
1✔
250
            for fileset in input_file_sets:
1✔
251
                file_set_object = request.embed(fileset, '@@object')
1✔
252
                if file_set_object.get('preferred_assay_title') and \
1✔
253
                        'MeasurementSet' in file_set_object.get('@type'):
254
                    assay_title.add(file_set_object.get('preferred_assay_title'))
1✔
255
                elif 'MeasurementSet' in file_set_object.get('@type'):
1✔
256
                    assay = request.embed(file_set_object['assay_term'], '@@object')
1✔
257
                    assay_title.add(assay.get('term_name'))
1✔
258
            return list(assay_title)
1✔
259

260
    @calculated_property(
1✔
261
        condition='samples',
262
        schema={
263
            'title': 'Donors',
264
            'description': 'The donors of the samples associated with this analysis set.',
265
            'type': 'array',
266
            'minItems': 1,
267
            'uniqueItems': True,
268
            'items': {
269
                'title': 'Donor',
270
                'description': 'Donor of a sample associated with this analysis set.',
271
                'type': 'string',
272
                'linkTo': 'Donor'
273
            },
274
            'notSubmittable': True,
275
        }
276
    )
277
    def donors(self, request, samples=None):
1✔
278
        return get_donors_from_samples(request, samples)
1✔
279

280

281
@collection(
1✔
282
    name='curated-sets',
283
    unique_key='accession',
284
    properties={
285
        'title': 'Curated Sets',
286
        'description': 'Listing of curated sets',
287
    }
288
)
289
class CuratedSet(FileSet):
1✔
290
    item_type = 'curated_set'
1✔
291
    schema = load_schema('igvfd:schemas/curated_set.json')
1✔
292
    embedded_with_frame = FileSet.embedded_with_frame
1✔
293
    audit_inherit = FileSet.audit_inherit
1✔
294
    set_status_up = FileSet.set_status_up + []
1✔
295
    set_status_down = FileSet.set_status_down + []
1✔
296

297
    @calculated_property(
1✔
298
        define=True,
299
        schema={
300
            'title': 'Assemblies',
301
            'description': 'The genome assemblies to which the referencing files in the file set are utilizing (e.g., GRCh38).',
302
            'type': 'array',
303
            'minItems': 1,
304
            'uniqueItems': True,
305
            'items': {
306
                'title': 'Assembly',
307
                'type': 'string'
308
            },
309
            'notSubmittable': True,
310
        }
311
    )
312
    def assemblies(self, request, files=None):
1✔
313
        if files:
1✔
314
            assembly_values = set()
1✔
315
            for current_file_path in files:
1✔
316
                file_object = request.embed(current_file_path, '@@object?skip_calculated=true')
1✔
317
                if file_object.get('assembly'):
1✔
318
                    assembly_values.add(file_object.get('assembly'))
1✔
319
            if assembly_values:
1✔
320
                return sorted(list(assembly_values))
1✔
321

322
    @calculated_property(
1✔
323
        define=True,
324
        schema={
325
            'title': 'Transcriptome Annotations',
326
            'description': 'The annotation versions of the reference resource.',
327
            'type': 'array',
328
            'minItems': 1,
329
            'uniqueItems': True,
330
            'items': {
331
                'title': 'Transcriptome Annotation',
332
                'type': 'string'
333
            },
334
            'notSubmittable': True,
335
        }
336
    )
337
    def transcriptome_annotations(self, request, files=None):
1✔
338
        if files:
1✔
339
            annotation_values = set()
1✔
340
            for current_file_path in files:
1✔
341
                file_object = request.embed(current_file_path, '@@object?skip_calculated=true')
1✔
342
                if file_object.get('transcriptome_annotation'):
1✔
343
                    annotation_values.add(file_object.get('transcriptome_annotation'))
1✔
344
            if annotation_values:
1✔
345
                return sorted(list(annotation_values))
1✔
346

347
    @calculated_property(
1✔
348
        schema={
349
            'title': 'Summary',
350
            'type': 'string',
351
            'notSubmittable': True,
352
        }
353
    )
354
    def summary(self, file_set_type, assemblies=None, transcriptome_annotations=None, taxa=None):
1✔
355
        summary_message = ''
1✔
356
        if taxa:
1✔
357
            summary_message += f'{taxa} '
1✔
358
        if assemblies:
1✔
359
            assembly_values_joined = ' '.join(assemblies)
1✔
360
            summary_message += f'{assembly_values_joined} '
1✔
361
        if transcriptome_annotations:
1✔
362
            annotation_values_joined = ' '.join(transcriptome_annotations)
1✔
363
            summary_message += f'{annotation_values_joined} '
1✔
364
        summary_message += file_set_type
1✔
365
        return summary_message
1✔
366

367

368
@collection(
1✔
369
    name='measurement-sets',
370
    unique_key='accession',
371
    properties={
372
        'title': 'Measurement Sets',
373
        'description': 'Listing of measurement sets',
374
    })
375
class MeasurementSet(FileSet):
1✔
376
    item_type = 'measurement_set'
1✔
377
    schema = load_schema('igvfd:schemas/measurement_set.json')
1✔
378
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
379
        Path('assay_term', include=['@id', 'term_name']),
380
        Path('library_construction_platform', include=['@id', 'term_name']),
381
        Path('control_file_sets', include=['@id', 'accession', 'aliases']),
382
        Path('related_multiome_datasets', include=['@id', 'accession']),
383
        Path('auxiliary_sets', include=['@id', 'accession', 'aliases']),
384
        Path('samples.treatments', include=['@id', 'purpose', 'treatment_type', 'summary']),
385
        Path('samples.cell_fate_change_treatments', include=['@id', 'purpose', 'treatment_type', 'summary']),
386
        Path('samples.disease_terms', include=['@id', 'term_name']),
387
        Path('samples.modifications', include=['@id', 'modality']),
388
        Path('samples.construct_library_sets', include=['@id', 'accession', 'summary']),
389
        Path('files.sequencing_platform', include=['@id', 'term_name']),
390
    ]
391

392
    audit_inherit = FileSet.audit_inherit + [
1✔
393
        'auxiliary_sets',
394
        'library_construction_platform',
395
        'assay_term',
396
    ]
397

398
    set_status_up = FileSet.set_status_up + [
1✔
399
        'assay_term',
400
        'library_construction_platform',
401
    ]
402
    set_status_down = FileSet.set_status_down + []
1✔
403

404
    @calculated_property(
1✔
405
        condition='multiome_size',
406
        schema={
407
            'title': 'Related Multiome Datasets',
408
            'description': 'Related datasets included in the multiome experiment this measurement set is a part of.',
409
            'type': 'array',
410
            'minItems': 1,
411
            'uniqueItems': True,
412
            'items': {
413
                'title': 'Related Multiome Dataset',
414
                'description': 'Related dataset included in the multiome experiment this measurement set is a part of.',
415
                'type': 'string',
416
                'linkTo': 'MeasurementSet'
417
            },
418
            'notSubmittable': True,
419
        }
420
    )
421
    def related_multiome_datasets(self, request, samples=None):
1✔
422
        object_id = self.jsonld_id(request)
1✔
423
        if samples:
1✔
424
            related_datasets = []
1✔
425
            for sample in samples:
1✔
426
                sample_object = request.embed(sample, '@@object')
1✔
427
                if sample_object.get('file_sets'):
1✔
428
                    for file_set_id in sample_object.get('file_sets'):
1✔
429
                        if '/measurement-sets/' == file_set_id[:18] and \
1✔
430
                            object_id != file_set_id and \
431
                                file_set_id not in related_datasets:
432
                            related_datasets.append(file_set_id)
1✔
433
            return related_datasets
1✔
434

435
    @calculated_property(
1✔
436
        schema={
437
            'title': 'Summary',
438
            'type': 'string',
439
            'notSubmittable': True,
440
        }
441
    )
442
    def summary(self, request, assay_term, preferred_assay_title=None, samples=None):
1✔
443
        assay = request.embed(assay_term)['term_name']
1✔
444
        modality_set = set()
1✔
445
        cls_set = set()
1✔
446
        cls_phrase = ''
1✔
447
        modality_phrase = ''
1✔
448
        assay_phrase = ''
1✔
449
        preferred_title_phrase = ''
1✔
450

451
        if samples:
1✔
452
            for sample in samples:
1✔
453
                sample_object = request.embed(sample, '@@object')
1✔
454
                if sample_object.get('modifications'):
1✔
455
                    for modification in sample_object.get('modifications'):
1✔
456
                        modality = request.embed(modification)['modality']
1✔
457
                        modality_set.add(modality)
1✔
458
                if sample_object.get('construct_library_sets'):
1✔
459
                    for construct_library in sample_object.get('construct_library_sets'):
1✔
460
                        cls_summary = request.embed(construct_library)['summary']
1✔
461
                        cls_set.add(cls_summary)
1✔
462
        if preferred_assay_title:
1✔
463
            preferred_title_phrase = f' ({preferred_assay_title})'
1✔
464
        if len(modality_set) > 1:
1✔
465
            modality_phrase = f'mixed'
1✔
466
            assay_phrase = f' {assay}'
1✔
467
        if len(modality_set) == 1:
1✔
468
            modality_set = ''.join(modality_set)
1✔
469
            if assay == 'CRISPR screen':
1✔
UNCOV
470
                assay_phrase = f'CRISPR {modality_set} screen'
×
471
            else:
472
                modality_phrase = ''
1✔
473
                assay_phrase = f'{assay}'
1✔
474
        if len(modality_set) == 0:
1✔
475
            assay_phrase = f'{assay}'
1✔
476
        if len(cls_set) > 0:
1✔
477
            cls_phrases = []
1✔
478
            for summary in cls_set:
1✔
479
                article = 'a'
1✔
480
                if any(summary.startswith(x) for x in ['a', 'e', 'i', 'o', 'u']):
1✔
481
                    article = 'an'
×
482
                cls_phrases.append(f'{article} {summary[0].lower()}{summary[1:]}')
1✔
483
            if len(cls_phrases) == 1:
1✔
484
                cls_phrase = cls_phrases[0]
1✔
485
            elif len(cls_phrases) == 2:
×
486
                cls_phrase = ' and '.join(cls_phrases)
×
487
            elif len(cls_phrases) > 2:
×
488
                cls_phrase = ', '.join(cls_phrases[:-1]) + ', and ' + cls_phrases[-1]
×
489
            cls_phrase = f' integrating {cls_phrase}'
1✔
490
        sentence = ''
1✔
491
        sentence_parts = [
1✔
492
            modality_phrase,
493
            assay_phrase,
494
            preferred_title_phrase,
495
            cls_phrase,
496
        ]
497
        for phrase in sentence_parts:
1✔
498
            if phrase != '':
1✔
499
                sentence += phrase
1✔
500
        return sentence
1✔
501

502
    @calculated_property(
1✔
503
        condition='samples',
504
        schema={
505
            'title': 'Donors',
506
            'description': 'The donors of the samples associated with this measurement set.',
507
            'type': 'array',
508
            'minItems': 1,
509
            'uniqueItems': True,
510
            'items': {
511
                'title': 'Donor',
512
                'description': 'Donor of a sample associated with this measurement set.',
513
                'type': 'string',
514
                'linkTo': 'Donor'
515
            },
516
            'notSubmittable': True,
517
        }
518
    )
519
    def donors(self, request, samples=None):
1✔
520
        return get_donors_from_samples(request, samples)
1✔
521

522

523
@collection(
1✔
524
    name='model-sets',
525
    unique_key='accession',
526
    properties={
527
        'title': 'Model Sets',
528
        'description': 'Listing of model sets',
529
    }
530
)
531
class ModelSet(FileSet):
1✔
532
    item_type = 'model_set'
1✔
533
    schema = load_schema('igvfd:schemas/model_set.json')
1✔
534
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
535
        Path('input_file_sets', include=['@id', 'accession', 'aliases'])
536
    ]
537
    audit_inherit = FileSet.audit_inherit
1✔
538
    set_status_up = FileSet.set_status_up + [
1✔
539
        'software_version'
540
    ]
541
    set_status_down = FileSet.set_status_down + []
1✔
542

543

544
@collection(
1✔
545
    name='auxiliary-sets',
546
    unique_key='accession',
547
    properties={
548
        'title': 'Auxiliary Sets',
549
        'description': 'Listing of auxiliary sets',
550
    })
551
class AuxiliarySet(FileSet):
1✔
552
    item_type = 'auxiliary_set'
1✔
553
    schema = load_schema('igvfd:schemas/auxiliary_set.json')
1✔
554
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
555
        Path('measurement_sets', include=['@id', 'accession', 'aliases']),
556
    ]
557
    audit_inherit = FileSet.audit_inherit
1✔
558
    rev = FileSet.rev | {'measurement_sets': ('MeasurementSet', 'auxiliary_sets')}
1✔
559
    set_status_up = FileSet.set_status_up + [
1✔
560
        'library_construction_platform'
561
    ]
562
    set_status_down = FileSet.set_status_down + []
1✔
563

564
    @calculated_property(schema={
1✔
565
        'title': 'Measurement Sets',
566
        'description': 'The measurement sets that link to this auxiliary set.',
567
        'type': 'array',
568
        'minItems': 1,
569
        'uniqueItems': True,
570
        'items': {
571
            'title': 'Measurement Set',
572
            'type': ['string', 'object'],
573
            'linkFrom': 'MeasurementSet.auxiliary_sets',
574
        },
575
        'notSubmittable': True
576
    })
577
    def measurement_sets(self, request, measurement_sets):
1✔
578
        return paths_filtered_by_status(request, measurement_sets)
1✔
579

580
    @calculated_property(
1✔
581
        schema={
582
            'title': 'Summary',
583
            'type': 'string',
584
            'notSubmittable': True,
585
        }
586
    )
587
    def summary(self, request, file_set_type, measurement_sets=None):
1✔
588
        if not measurement_sets:
1✔
589
            return f'{file_set_type}'
1✔
590
        measurement_sets_summaries = [request.embed(measurement_set, '@@object').get('summary')
1✔
591
                                      for measurement_set in measurement_sets[:2] if measurement_set]
592
        if len(measurement_sets) > 2:
1✔
593
            remainder = f'... and {len(measurement_sets) - 2} more measurement set{"s" if len(measurement_sets) - 2 != 1 else ""}'
1✔
594
            measurement_sets_summaries = measurement_sets_summaries + [remainder]
1✔
595
        return f'{file_set_type} for {", ".join(measurement_sets_summaries)}'
1✔
596

597
    @calculated_property(
1✔
598
        condition='samples',
599
        schema={
600
            'title': 'Donors',
601
            'description': 'The donors of the samples associated with this auxiliary set.',
602
            'type': 'array',
603
            'minItems': 1,
604
            'uniqueItems': True,
605
            'items': {
606
                'title': 'Donor',
607
                'description': 'Donor of a sample associated with this auxiliary set.',
608
                'type': 'string',
609
                'linkTo': 'Donor'
610
            },
611
            'notSubmittable': True,
612
        }
613
    )
614
    def donors(self, request, samples=None):
1✔
615
        return get_donors_from_samples(request, samples)
1✔
616

617

618
@collection(
1✔
619
    name='prediction-sets',
620
    unique_key='accession',
621
    properties={
622
        'title': 'Prediction Sets',
623
        'description': 'Listing of prediction sets',
624
    })
625
class PredictionSet(FileSet):
1✔
626
    item_type = 'prediction_set'
1✔
627
    schema = load_schema('igvfd:schemas/prediction_set.json')
1✔
628
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
629
        Path('samples.construct_library_sets', include=['@id', 'accession', 'summary']),
630
        Path('large_scale_gene_list', include=['@id', 'accession', 'aliases']),
631
        Path('large_scale_loci_list', include=['@id', 'accession', 'aliases']),
632
        Path('small_scale_gene_list', include=['@id', 'geneid', 'symbol', 'name', 'synonyms']),
633
    ]
634
    audit_inherit = FileSet.audit_inherit
1✔
635
    set_status_up = FileSet.set_status_up + []
1✔
636
    set_status_down = FileSet.set_status_down + []
1✔
637

638

639
@collection(
1✔
640
    name='construct-library-sets',
641
    unique_key='accession',
642
    properties={
643
        'title': 'Construct Library Sets',
644
        'description': 'Listing of construct library sets',
645
    })
646
class ConstructLibrarySet(FileSet):
1✔
647
    item_type = 'construct_library_set'
1✔
648
    schema = load_schema('igvfd:schemas/construct_library_set.json')
1✔
649
    embedded_with_frame = [
1✔
650
        Path('award', include=['@id', 'component']),
651
        Path('lab', include=['@id', 'title']),
652
        Path('submitted_by', include=['@id', 'title']),
653
        Path('files', include=['@id', 'accession', 'aliases', 'content_type', 'file_format']),
654
        Path('control_for', include=['@id', 'accession', 'aliases']),
655
        Path('associated_phenotypes', include=['@id', 'term_id', 'term_name']),
656
        Path('small_scale_gene_list', include=['@id', 'geneid', 'symbol', 'name', 'synonyms']),
657
        Path('applied_to_samples', include=['@id', '@type', 'accession',
658
             'aliases', 'disease_terms', 'sample_terms', 'status', 'summary']),
659
        Path('applied_to_samples.disease_terms', include=['@id', 'term_name']),
660
        Path('applied_to_samples.sample_terms', include=['@id', 'term_name']),
661
        Path('large_scale_gene_list', include=['@id', 'accession', 'aliases']),
662
        Path('large_scale_loci_list', include=['@id', 'accession', 'aliases']),
663
        Path('orf_list', include=['@id', 'orf_id', 'gene', 'aliases']),
664
    ]
665
    audit_inherit = [
1✔
666
        'award',
667
        'lab',
668
        'files',
669
        'documents',
670
    ]
671

672
    rev = FileSet.rev | {'applied_to_samples': ('Sample', 'construct_library_sets')}
1✔
673

674
    set_status_up = FileSet.set_status_up + []
1✔
675
    set_status_down = FileSet.set_status_down + []
1✔
676

677
    @calculated_property(schema={
1✔
678
        'title': 'Applied to Samples',
679
        'description': 'The samples that link to this construct library set.',
680
        'type': 'array',
681
        'minItems': 1,
682
        'uniqueItems': True,
683
        'items': {
684
            'title': 'Applied to Sample',
685
            'type': ['string', 'object'],
686
            'linkFrom': 'Sample.construct_library_sets',
687
        },
688
        'notSubmittable': True
689
    })
690
    def applied_to_samples(self, request, applied_to_samples):
1✔
691
        return paths_filtered_by_status(request, applied_to_samples)
1✔
692

693
    @calculated_property(
1✔
694
        schema={
695
            'title': 'Summary',
696
            'type': 'string',
697
            'notSubmittable': True,
698
        }
699
    )
700
    def summary(self, request, file_set_type, scope, selection_criteria, small_scale_gene_list=None, large_scale_gene_list=None, guide_type=None,
1✔
701
                small_scale_loci_list=None, large_scale_loci_list=None, exon=None, tile=None, orf_list=None, associated_phenotypes=None):
702
        library_type = ''
1✔
703
        target_phrase = ''
1✔
704
        pheno_terms = []
1✔
705
        pheno_phrase = ''
1✔
706
        preposition = ''
1✔
707
        criteria = []
1✔
708
        criteria = criteria + selection_criteria
1✔
709

710
        if scope == 'loci':
1✔
711
            if small_scale_loci_list and len(small_scale_loci_list) > 1:
1✔
712
                target_phrase = f' {len(small_scale_loci_list)} genomic loci'
1✔
713
            elif large_scale_loci_list:
1✔
714
                target_phrase = f' many genomic loci'
1✔
715
            else:
716
                target_phrase = f' a genomic locus'
1✔
717
        if scope == 'genes':
1✔
718
            if small_scale_gene_list and len(small_scale_gene_list) > 1:
1✔
719
                target_phrase = f' {len(small_scale_gene_list)} genes'
1✔
720
            elif small_scale_gene_list and len(small_scale_gene_list) == 1:
1✔
721
                gene_object = request.embed(small_scale_gene_list[0], '@@object?skip_calculated=true')
1✔
722
                gene_name = (gene_object.get('symbol'))
1✔
723
                target_phrase = f' {gene_name}'
1✔
724
            elif large_scale_gene_list:
1✔
725
                target_phrase = f' many genes'
1✔
726
        if scope == 'exon':
1✔
727
            if small_scale_gene_list and len(small_scale_gene_list) > 1:
1✔
728
                target_phrase = f' exon {exon} of multiple genes'
1✔
729
            elif small_scale_gene_list and len(small_scale_gene_list) == 1:
1✔
730
                gene_object = request.embed(small_scale_gene_list[0], '@@object?skip_calculated=true')
1✔
731
                gene_name = (gene_object.get('symbol'))
1✔
732
                target_phrase = f' exon {exon} of {gene_name}'
1✔
733
        if scope == 'interactors':
1✔
734
            if orf_list and len(orf_list) > 1:
1✔
735
                target_phrase = f' {len(orf_list)} open reading frames'
1✔
736
            elif small_scale_gene_list and len(small_scale_gene_list) == 1:
1✔
737
                gene_object = request.embed(small_scale_gene_list[0], '@@object?skip_calculated=true')
1✔
738
                orf_object = request.embed(orf_list[0], '@@object?skip_calculated=true')
1✔
739
                gene_name = (gene_object.get('symbol'))
1✔
740
                orf_id = (orf_object.get('orf_id'))
1✔
741
                target_phrase = f' open reading frame {orf_id} of {gene_name}'
1✔
742
        if scope == 'tile':
1✔
743
            tile_id = tile['tile_id']
1✔
744
            start = tile['tile_start']
1✔
745
            end = tile['tile_end']
1✔
746
            if small_scale_gene_list and len(small_scale_gene_list) > 1:
1✔
747
                target_phrase = f' tile {tile_id} of multiple genes'
1✔
748
            elif small_scale_gene_list and len(small_scale_gene_list) == 1:
1✔
749
                gene_object = request.embed(small_scale_gene_list[0], '@@object?skip_calculated=true')
1✔
750
                gene_name = (gene_object.get('symbol'))
1✔
751
                target_phrase = f' tile {tile_id} of {gene_name} (AA {start}-{end})'
1✔
752
        if scope == 'genome-wide':
1✔
753
            target_phrase = ' genome-wide'
1✔
754

755
        if file_set_type == 'expression vector library':
1✔
756
            library_type = 'Expression vector library'
1✔
757
        if file_set_type == 'guide library':
1✔
758
            if guide_type == 'sgRNA':
1✔
759
                library_type = 'Guide (sgRNA) library'
1✔
760
            if guide_type == 'pgRNA':
1✔
761
                library_type = 'Guide (pgRNA) library'
×
762
        if file_set_type == 'reporter library':
1✔
763
            library_type = 'Reporter library'
1✔
764

765
        if associated_phenotypes:
1✔
766
            for pheno in associated_phenotypes:
1✔
767
                pheno_object = request.embed(pheno, '@@object?skip_calculated=true')
1✔
768
                term_name = (pheno_object.get('term_name'))
1✔
769
                pheno_terms.append(term_name)
1✔
770
            if len(pheno_terms) in [1, 2]:
1✔
771
                phenos = ', '.join(pheno_terms)
1✔
772
                pheno_phrase = f' associated with {phenos}'
1✔
773
            else:
774
                pheno_phrase = f' associated with {len(pheno_terms)} phenotypes'
×
775

776
        if file_set_type == 'expression vector library':
1✔
777
            if 'genes' in criteria:
1✔
778
                criteria.remove('genes')
1✔
779
            selections = ', '.join(criteria)
1✔
780
            if selections:
1✔
781
                selections = f' ({selections})'
1✔
782
            preposition = ' of'
1✔
783
            return f'{library_type}{preposition}{target_phrase}{selections}{pheno_phrase}'
1✔
784
        else:
785
            selections = ', '.join(criteria)
1✔
786
            if scope == 'genome-wide':
1✔
787
                preposition = ''
1✔
788
            else:
789
                preposition = ' in'
1✔
790
            return f'{library_type} targeting {selections}{preposition}{target_phrase}{pheno_phrase}'
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc