• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IGVF-DACC / igvfd / #8282

22 May 2025 06:21PM UTC coverage: 90.935% (+0.004%) from 90.931%
#8282

Pull #1543

coveralls-python

jenjou
Mappings
Pull Request #1543: IGVF-2319-aset-summary-cls-only

15 of 15 new or added lines in 1 file covered. (100.0%)

1 existing line in 1 file now uncovered.

8647 of 9509 relevant lines covered (90.93%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.41
/src/igvfd/types/file_set.py
1
from snovault import (
1✔
2
    abstract_collection,
3
    calculated_property,
4
    collection,
5
    load_schema,
6
)
7
from snovault.util import Path
1✔
8

9
from .base import (
1✔
10
    Item,
11
    paths_filtered_by_status
12
)
13

14
from datetime import datetime
1✔
15

16

17
def get_donors_from_samples(request, samples):
1✔
18
    donor_objects = []
1✔
19
    for sample in samples:
1✔
20
        donor_objects += request.embed(sample, '@@object').get('donors', [])
1✔
21
    return list(set(donor_objects))
1✔
22

23

24
def get_fileset_objs_from_input_file_sets(request, input_file_sets):
1✔
25
    '''Get file set objects from an array of input file sets'''
26
    file_set_objs = []
1✔
27
    if input_file_sets is not None:
1✔
28
        for fileset in input_file_sets:
1✔
29
            file_set_objs.append(request.embed(fileset, '@@object'))
1✔
30
    return file_set_objs
1✔
31

32

33
def get_file_objs_from_files(request, files):
1✔
34
    '''Get file objects from an array of files'''
35
    file_objs = []
1✔
36
    if files is not None:
1✔
37
        for file in files:
1✔
38
            file_objs.append(request.embed(file, '@@object?skip_calculated=true'))
1✔
39
    return file_objs
1✔
40

41

42
def get_assessed_gene_symbols(request, assessed_genes=None):
1✔
43
    '''Get gene symbols from an array of assessed genes.'''
44
    if not assessed_genes:
1✔
45
        return ''
1✔
46
    else:
47
        gene_symbols = []
1✔
48
        for assessed_gene in assessed_genes:
1✔
49
            gene_symbols.append(request.embed(
1✔
50
                assessed_gene, '@@object?skip_calculated=true').get('symbol', ''))
51
        return ', '.join(sorted(gene_symbols))
1✔
52

53

54
def get_assessed_gene_phrase(request, assessed_genes=None):
1✔
55
    """Get the assessed gene phrase from assessed_genes. If more than 5, return the number of assessed genes.
56
    """
57
    len_assessed_genes = len(assessed_genes) if assessed_genes else 0
1✔
58
    if len_assessed_genes > 5:
1✔
59
        assessed_gene_phrase = f'{len_assessed_genes} assessed genes'
1✔
60
    else:
61
        assessed_gene_phrase = get_assessed_gene_symbols(request, assessed_genes)
1✔
62
    return assessed_gene_phrase
1✔
63

64

65
def get_cls_phrase(cls_set):
1✔
66
    cls_set = sorted(cls_set)
1✔
67
    cls_phrases = []
1✔
68
    for summary in cls_set:
1✔
69
        article = 'a'
1✔
70
        if any(summary.startswith(x) for x in ['a', 'e', 'i', 'o', 'u']):
1✔
71
            article = 'an'
1✔
72
        cls_phrases.append(f'{article} {summary[0].lower()}{summary[1:]}')
1✔
73
    if len(cls_phrases) == 1:
1✔
74
        cls_phrase = cls_phrases[0]
1✔
75
    elif len(cls_phrases) == 2:
1✔
76
        cls_phrase = ' and '.join(cls_phrases)
1✔
77
    # make special case for SGE assays: >20 CLS under each analysis set
78
    elif len(cls_phrases) > 2:
1✔
79
        if cls_phrases[0].startswith('an editing template library'):
1✔
80
            common_phrase = cls_phrases[0].split(' in ')[0]
1✔
81
            # only condense sentence if all CLS have the same selection criteria (i.e. sequence variants)
82
            if all(cls_phrase.startswith(common_phrase) for cls_phrase in cls_phrases):
1✔
83
                targeton_dict = {}
1✔
84
                for cls_phrase in cls_phrases:
1✔
85
                    # e.g. editing template library targeting sequence variants in exon6B of BARD1
86
                    gene = cls_phrase.split(' ')[-1]
1✔
87
                    targeton = cls_phrase.split(' ')[-3]
1✔
88
                    if gene not in targeton_dict:
1✔
89
                        targeton_dict[gene] = set([targeton])
1✔
90
                    else:
91
                        targeton_dict[gene].add(targeton)
1✔
92
                cls_count_phrases = []
1✔
93
                for gene, targetons in targeton_dict.items():
1✔
94
                    cls_count_phrases.append(f'in {len(targetons)} targetons of {gene}')
1✔
95
                cls_phrase = common_phrase.replace('an ', '').replace(
1✔
96
                    'library', 'libraries') + ' ' + ', '.join(cls_count_phrases)
97
            else:
98
                cls_phrase = ', '.join(cls_phrases[:-1]) + ', and ' + cls_phrases[-1]
×
99
        else:
100
            cls_phrase = ', '.join(cls_phrases[:-1]) + ', and ' + cls_phrases[-1]
×
101
    cls_phrase = f'integrating {cls_phrase}'
1✔
102
    return cls_phrase
1✔
103

104

105
@abstract_collection(
1✔
106
    name='file-sets',
107
    unique_key='accession',
108
    properties={
109
        'title': 'File Sets',
110
        'description': 'Listing of file sets',
111
    }
112
)
113
class FileSet(Item):
1✔
114
    item_type = 'file_set'
1✔
115
    base_types = ['FileSet'] + Item.base_types
1✔
116
    name_key = 'accession'
1✔
117
    schema = load_schema('igvfd:schemas/file_set.json')
1✔
118
    rev = {
1✔
119
        'files': ('File', 'file_set'),
120
        'control_for': ('FileSet', 'control_file_sets'),
121
        'input_for': ('FileSet', 'input_file_sets')
122
    }
123
    embedded_with_frame = [
1✔
124
        Path('award.contact_pi', include=['@id', 'contact_pi', 'component', 'title']),
125
        Path('lab', include=['@id', 'title']),
126
        Path('submitted_by', include=['@id', 'title']),
127
        Path('files', include=['@id', 'accession', 'aliases', 'assembly', 'content_type', 'controlled_access', 'derived_from',
128
             'file_format', 'file_size', 'href', 'md5sum', 's3_uri', 'submitted_file_name', 'status', 'transcriptome_annotation',
129
                               'creation_timestamp', 'sequencing_platform', 'upload_status', 'submitted_file_name']),
130
        Path('control_for', include=['@id', 'accession', 'aliases', 'status']),
131
        Path('donors', include=['@id', 'accession', 'aliases', 'sex', 'status', 'strain_background', 'taxa']),
132
        Path('samples.sample_terms', include=[
133
            '@id',
134
            '@type',
135
            'accession',
136
            'aliases',
137
            'treatments',
138
            'cellular_sub_pool',
139
            'classifications',
140
            'disease_terms',
141
            'modifications',
142
            'sample_terms',
143
            'status',
144
            'summary',
145
            'targeted_sample_term',
146
            'taxa',
147
            'term_name',
148
            'treatments',
149
            'institutional_certificates',
150
        ]),
151
        Path('samples.disease_terms', include=['@id', 'term_name', 'status']),
152
        Path('samples.targeted_sample_term', include=['@id', 'term_name', 'status']),
153
        Path('samples.modifications', include=['@id', 'modality', 'status']),
154
        Path('samples.treatments', include=['@id', 'treatment_term_name',
155
             'purpose', 'treatment_type', 'summary', 'status']),
156
        Path('samples.institutional_certificates', include=['@id',
157
             'certificate_identifier', 'status', 'data_use_limitation', 'data_use_limitation_modifiers', 'controlled_access']),
158
        Path('construct_library_sets.integrated_content_files', include=[
159
             '@id', 'accession', 'file_set_type', 'summary', 'status', 'content_type', 'integrated_content_files']),
160
        Path('publications', include=['@id', 'publication_identifiers', 'status']),
161
    ]
162

163
    audit_inherit = [
1✔
164
        'award',
165
        'lab',
166
        'files',
167
        'documents',
168
        'samples',
169
        'samples.sample_terms',
170
        'samples.disease_terms',
171
        'samples.treatments',
172
        'samples.modifications',
173
        'donors',
174
    ]
175

176
    set_status_up = [
1✔
177
        'documents',
178
        'files',
179
        'input_file_sets',
180
        'samples'
181
    ]
182
    set_status_down = [
1✔
183
    ]
184

185
    @calculated_property(schema={
1✔
186
        'title': 'Files',
187
        'type': 'array',
188
        'description': 'The files associated with this file set.',
189
        'minItems': 1,
190
        'uniqueItems': True,
191
        'items': {
192
            'title': 'File',
193
            'type': ['string', 'object'],
194
            'linkFrom': 'File.file_set',
195
        },
196
        'notSubmittable': True
197
    })
198
    def files(self, request, files):
1✔
199
        return paths_filtered_by_status(request, files)
1✔
200

201
    @calculated_property(schema={
1✔
202
        'title': 'File Sets Controlled By This File Set',
203
        'type': 'array',
204
        'description': 'The file sets for which this file set is a control.',
205
        'minItems': 1,
206
        'uniqueItems': True,
207
        'items': {
208
            'title': 'File Set Controlled By This File Set',
209
            'type': ['string', 'object'],
210
            'linkFrom': 'FileSet.control_file_sets',
211
        },
212
        'notSubmittable': True
213
    })
214
    def control_for(self, request, control_for):
1✔
215
        return paths_filtered_by_status(request, control_for)
1✔
216

217
    @calculated_property(schema={
1✔
218
        'title': 'Submitted Files Timestamp',
219
        'description': 'The timestamp the first file object in the file_set or associated auxiliary sets was created.',
220
        'comment': 'Do not submit. The timestamp is automatically calculated.',
221
        'type': 'string',
222
        'format': 'date-time',
223
        'notSubmittable': True
224
    })
225
    def submitted_files_timestamp(self, request, files, auxiliary_sets=[]):
1✔
226
        timestamps = set()
1✔
227
        files_to_traverse = []
1✔
228
        if files:
1✔
229
            files_to_traverse.extend(files)
1✔
230
        if auxiliary_sets:
1✔
231
            for auxiliary_set in auxiliary_sets:
1✔
232
                aux_set_object = request.embed(auxiliary_set, '@@object_with_select_calculated_properties?field=files')
1✔
233
                if 'files' in aux_set_object:
1✔
234
                    files_to_traverse.extend(aux_set_object['files'])
1✔
235
        for current_file_path in files_to_traverse:
1✔
236
            file_object = request.embed(current_file_path, '@@object?skip_calculated=true')
1✔
237
            timestamp = file_object.get('creation_timestamp', None)
1✔
238
            if timestamp:
1✔
239
                timestamps.add(timestamp)
1✔
240
        if timestamps:
1✔
241
            res = sorted(timestamps, key=lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S.%f%z'))
1✔
242
            return res[0]
1✔
243

244
    @calculated_property(schema={
1✔
245
        'title': 'Input For',
246
        'description': 'The file sets that use this file set as an input.',
247
        'type': 'array',
248
        'minItems': 1,
249
        'uniqueItems': True,
250
        'items': {
251
            'title': 'Input For',
252
            'type': ['string', 'object'],
253
            'linkFrom': 'FileSet.input_file_sets',
254
        },
255
        'notSubmittable': True
256
    })
257
    def input_for(self, request, input_for):
1✔
258
        return paths_filtered_by_status(request, input_for)
1✔
259

260
    @calculated_property(
1✔
261
        define=True,
262
        condition='samples',
263
        schema={
264
            'title': 'Construct Library Sets',
265
            'description': 'The construct library sets associated with the samples of this file set.',
266
            'type': 'array',
267
            'minItems': 1,
268
            'uniqueItems': True,
269
            'items': {
270
                'title': 'Construct Library Set',
271
                'description': 'A construct library set associated with a sample of this file set.',
272
                'type': 'string',
273
                'linkTo': 'FileSet',
274
            },
275
            'notSubmittable': True
276
        })
277
    def construct_library_sets(self, request, samples=None):
1✔
278
        construct_library_sets = set()
1✔
279
        for sample in samples:
1✔
280
            sample_object = request.embed(sample,
1✔
281
                                          '@@object_with_select_calculated_properties?'
282
                                          'field=construct_library_sets'
283
                                          )
284
            if sample_object.get('construct_library_sets', []):
1✔
285
                construct_library_sets = construct_library_sets | set(sample_object.get('construct_library_sets', []))
1✔
286
        if construct_library_sets:
1✔
287
            return list(construct_library_sets)
1✔
288

289
    @calculated_property(
1✔
290
        condition='samples',
291
        schema={
292
            'title': 'Data Use Limitation Summaries',
293
            'description': 'The data use limitation summaries of institutional certificates covering the sample associated with this file set which are signed by the same lab (or their partner lab) as the lab that submitted this file set.',
294
            'type': 'array',
295
            'minItems': 1,
296
            'uniqueItems': True,
297
            'items': {
298
                'title': 'Data Use Limitation Summary',
299
                'description': 'A combination of the data use limitation and its modifiers.',
300
                'type': 'string'
301
            },
302
            'notSubmittable': True,
303
        }
304
    )
305
    def data_use_limitation_summaries(self, request, lab, samples=None):
1✔
306
        summaries_to_return = []
1✔
307
        for sample in samples:
1✔
308
            sample_object = request.embed(
1✔
309
                sample, '@@object_with_select_calculated_properties?field=institutional_certificates')
310
            for ic in sample_object.get('institutional_certificates', []):
1✔
311
                ic_object = request.embed(
1✔
312
                    ic, '@@object_with_select_calculated_properties?field=data_use_limitation_summary')
313
                ic_labs = [ic_object.get('lab', None)] + ic_object.get('partner_labs', [])
1✔
314
                if lab in ic_labs:
1✔
315
                    summaries_to_return.append(ic_object.get('data_use_limitation_summary', None))
1✔
316
        if summaries_to_return:
1✔
317
            return list(set(summaries_to_return))
1✔
318
        else:
319
            return ['no certificate']
1✔
320

321
    @calculated_property(
1✔
322
        condition='samples',
323
        schema={
324
            'title': 'Controlled Access',
325
            'description': 'The controlled access of the institutional certificates covering the sample associated with this file set which are signed by the same lab (or their partner lab) as the lab that submitted this file set.',
326
            'type': 'boolean',
327
            'notSubmittable': True,
328
        }
329
    )
330
    def controlled_access(self, request, lab, samples=None):
1✔
331
        controlled_access_to_return = []
1✔
332
        for sample in samples:
1✔
333
            sample_object = request.embed(
1✔
334
                sample, '@@object_with_select_calculated_properties?field=institutional_certificates')
335
            for ic in sample_object.get('institutional_certificates', []):
1✔
336
                ic_object = request.embed(ic, '@@object?skip_calculated=true')
1✔
337
                ic_labs = [ic_object.get('lab', None)] + ic_object.get('partner_labs', [])
1✔
338
                if lab in ic_labs:
1✔
339
                    controlled_access_to_return.append(ic_object.get('controlled_access'))
1✔
340
        if controlled_access_to_return:
1✔
341
            if any(controlled_access_to_return):
1✔
342
                return True
1✔
343
            else:
344
                return False
1✔
345

346

347
@collection(
1✔
348
    name='analysis-sets',
349
    unique_key='accession',
350
    properties={
351
        'title': 'Analysis Sets',
352
        'description': 'Listing of analysis sets',
353
    }
354
)
355
class AnalysisSet(FileSet):
1✔
356
    item_type = 'analysis_set'
1✔
357
    schema = load_schema('igvfd:schemas/analysis_set.json')
1✔
358
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
359
        Path('input_file_sets', include=['@id', 'accession', 'aliases', 'file_set_type', 'status']),
360
        Path('functional_assay_mechanisms', include=['@id', 'term_id', 'term_name', 'status']),
361
        Path('workflows', include=['@id', 'accession', 'name', 'uniform_pipeline', 'status'])
362
    ]
363
    audit_inherit = FileSet.audit_inherit
1✔
364
    set_status_up = FileSet.set_status_up + []
1✔
365
    set_status_down = FileSet.set_status_down + []
1✔
366

367
    @calculated_property(
1✔
368
        schema={
369
            'title': 'Summary',
370
            'type': 'string',
371
            'notSubmittable': True,
372
        }
373
    )
374
    def summary(self, request, file_set_type, input_file_sets=[], files=[], samples=[], construct_library_sets=[]):
1✔
375
        inspected_filesets = set()
1✔
376
        fileset_types = set()
1✔
377
        file_content_types = set()
1✔
378
        targeted_genes = set()
1✔
379
        fileset_subclasses = set()
1✔
380
        assay_terms = set()
1✔
381
        assay_titles = set()
1✔
382
        cls_derived_assay_titles = set()
1✔
383
        crispr_modalities = set()
1✔
384
        cls_type_set = set()
1✔
385
        cls_set = set()
1✔
386
        control_type_set = set()
1✔
387
        unspecified_assay = ''
1✔
388
        crispr_screen_terms = [
1✔
389
            '/assay-terms/OBI_0003659/',
390
            '/assay-terms/OBI_0003661/'
391
        ]
392
        if input_file_sets:
1✔
393
            # The file_set_types are included based on the subclass
394
            # of only the directly associated input_file_sets, not
395
            # on the subclass of all file sets that are checked.
396
            for directly_linked_input in input_file_sets:
1✔
397
                fileset_object = request.embed(
1✔
398
                    directly_linked_input,
399
                    '@@object_with_select_calculated_properties?field=@type'
400
                )
401
                fileset_subclasses.add(fileset_object['@type'][0])
1✔
402
            filesets_to_inspect = set(input_file_sets.copy())
1✔
403
            while filesets_to_inspect:
1✔
404
                input_fileset = filesets_to_inspect.pop()
1✔
405
                if input_fileset not in inspected_filesets:
1✔
406
                    inspected_filesets.add(input_fileset)
1✔
407
                    fileset_object = request.embed(
1✔
408
                        input_fileset,
409
                        '@@object_with_select_calculated_properties?'
410
                        'field=@type&field=file_set_type&field=measurement_sets'
411
                        '&field=input_file_sets&field=targeted_genes.symbol'
412
                        '&field=assay_term&field=applied_to_samples'
413
                    )
414
                    # Trace back from Analysis Sets to identify their
415
                    # input file sets.
416
                    if (input_fileset.startswith('/analysis-sets/') and
1✔
417
                            fileset_object.get('input_file_sets', False)):
418
                        for candidate_fileset in fileset_object.get('input_file_sets'):
1✔
419
                            if candidate_fileset not in inspected_filesets:
1✔
420
                                filesets_to_inspect.add(candidate_fileset)
1✔
421
                    # Retrieve targeted_genes from Measurement Sets.
422
                    elif input_fileset.startswith('/measurement-sets/'):
1✔
423
                        if 'targeted_genes' in fileset_object:
1✔
424
                            for gene in fileset_object['targeted_genes']:
1✔
425
                                gene_object = request.embed(gene, '@@object?skip_calculated=true')
1✔
426
                                targeted_genes.add(gene_object['symbol'])
1✔
427
                        assay_terms.add(fileset_object['assay_term'])
1✔
428
                        if fileset_object['preferred_assay_title'] in ['10x multiome', '10x multiome with MULTI-seq', 'SHARE-seq']:
1✔
429
                            assay_term_object = request.embed(
1✔
430
                                fileset_object['assay_term'], '@@object?skip_calculated=true')
431
                            assay_term_name = assay_term_object.get('term_name', '')
1✔
432
                            assay_titles.add(f"{assay_term_name} ({fileset_object['preferred_assay_title']})")
1✔
433
                        else:
434
                            assay_titles.add(fileset_object['preferred_assay_title'])
1✔
435
                    # Retrieve Measurement Sets associated with Auxiliary Sets.
436
                    elif input_fileset.startswith('/auxiliary-sets/'):
1✔
437
                        fileset_types.add(fileset_object['file_set_type'])
1✔
438
                        if 'measurement_sets' in fileset_object:
1✔
439
                            for candidate_fileset in fileset_object.get('measurement_sets'):
1✔
440
                                measurement_set_object = request.embed(
1✔
441
                                    candidate_fileset, '@@object?skip_calculated=true')
442
                                assay_terms.add(measurement_set_object['assay_term'])
1✔
443
                                if measurement_set_object['preferred_assay_title'] in ['10x multiome', '10x multiome with MULTI-seq', 'SHARE-seq']:
1✔
444
                                    assay_term_object = request.embed(
×
445
                                        measurement_set_object['assay_term'], '@@object?skip_calculated=true')
446
                                    assay_term_name = assay_term_object.get('term_name', '')
×
447
                                    assay_titles.add(
×
448
                                        f"{assay_term_name} ({measurement_set_object['preferred_assay_title']})")
449
                                else:
450
                                    assay_titles.add(measurement_set_object['preferred_assay_title'])
1✔
451
                                if candidate_fileset not in inspected_filesets:
1✔
452
                                    filesets_to_inspect.add(candidate_fileset)
1✔
453
                    elif input_fileset.startswith('/construct-library-sets/'):
1✔
454
                        for sample in fileset_object.get('applied_to_samples', []):
1✔
455
                            sample_object = request.embed(
1✔
456
                                sample, '@@object_with_select_calculated_properties?field=file_sets')
457
                            for file_set in sample_object.get('file_sets', []):
1✔
458
                                file_set_object = request.embed(
1✔
459
                                    file_set, '@@object_with_select_calculated_properties?field=preferred_assay_title')
460
                                if file_set_object.get('preferred_assay_title'):
1✔
461
                                    if file_set_object['preferred_assay_title'] in ['10x multiome', '10x multiome with MULTI-seq', 'SHARE-seq']:
1✔
462
                                        assay_term_object = request.embed(
1✔
463
                                            file_set_object['assay_term'], '@@object?skip_calculated=true')
464
                                        assay_term_name = assay_term_object.get('term_name', '')
1✔
465
                                        cls_derived_assay_titles.add(
1✔
466
                                            f"{assay_term_name} ({file_set_object['preferred_assay_title']})")
467
                                    else:
468
                                        cls_derived_assay_titles.add(file_set_object['preferred_assay_title'])
1✔
469
                    elif not input_fileset.startswith('/analysis-sets/'):
1✔
UNCOV
470
                        fileset_types.add(fileset_object['file_set_type'])
×
471
                    # Collect control types.
472
                    if 'control_type' in fileset_object:
1✔
473
                        control_type_set.add(fileset_object['control_type'])
1✔
474

475
        # Collect content_types of files.
476
        if files:
1✔
477
            for file in files:
1✔
478
                file_object = request.embed(file, '@@object?skip_calculated=true')
1✔
479
                file_content_types.add(file_object['content_type'])
1✔
480

481
        # Collect CRISPR modalities and file set type from associated samples.
482
        if samples:
1✔
483
            for sample in samples:
1✔
484
                sample_object = request.embed(sample, '@@object?skip_calculated=true')
1✔
485
                if 'modifications' in sample_object:
1✔
486
                    for modification in sample_object['modifications']:
1✔
487
                        modification_object = request.embed(modification, '@@object?skip_calculated=true')
1✔
488
                        crispr_modalities.add(modification_object['modality'])
1✔
489
        # Collect construct library set summaries and types
490
        prop_with_cls = None
1✔
491
        if construct_library_sets:
1✔
492
            prop_with_cls = construct_library_sets
1✔
493
        elif len(fileset_subclasses) == 1 and ('ConstructLibrarySet' in fileset_subclasses):
1✔
494
            prop_with_cls = input_file_sets
1✔
495
        if prop_with_cls:
1✔
496
            for construct_library_set in prop_with_cls:
1✔
497
                construct_library_set_object = request.embed(
1✔
498
                    construct_library_set, '@@object_with_select_calculated_properties?field=summary')
499
                cls_type_set.add(construct_library_set_object['file_set_type'])
1✔
500
                cls_set.add(construct_library_set_object['summary'])
1✔
501
        cls_phrase = ''
1✔
502
        if len(cls_set) > 0:
1✔
503
            cls_phrase = get_cls_phrase(cls_set)
1✔
504

505
        # Assay titles if there are input file sets, otherwise unspecified.
506
        # Only use the CLS derived assay titles if there were no other assay titles.
507
        if cls_derived_assay_titles and not assay_titles:
1✔
508
            assay_titles = cls_derived_assay_titles
1✔
509
        assay_title_phrase = 'Unspecified assay'
1✔
510
        if assay_titles:
1✔
511
            assay_title_phrase = ', '.join(sorted(assay_titles))
1✔
512
        if 'guide library' in cls_type_set:
1✔
513
            if 'CRISPR' not in assay_title_phrase:
1✔
514
                assay_title_phrase = f'CRISPR {assay_title_phrase}'
1✔
515
        # Add modalities to the assay titles.
516
        if crispr_modalities:
1✔
517
            if len(crispr_modalities) > 1:
1✔
518
                modality_set = ', '.join(crispr_modalities)
1✔
519
            elif len(crispr_modalities) == 1:
1✔
520
                modality_set = ''.join(crispr_modalities)
1✔
521
            if 'CRISPR' in assay_title_phrase:
1✔
522
                assay_title_phrase = assay_title_phrase.replace('CRISPR', f'CRISPR {modality_set}')
1✔
523
            else:
524
                assay_title_phrase = f'{modality_set} {assay_title_phrase}'
1✔
525
        # Targeted genes.
526
        targeted_genes_phrase = ''
1✔
527
        if targeted_genes:
1✔
528
            targeted_genes_phrase = f'targeting {", ".join(targeted_genes)}'
1✔
529
        # The file set types are only shown if the inputs are all Auxiliary Sets
530
        # and the Measurement Sets related to the Auxiliary Sets are not CRISPR screens.
531
        file_set_type_phrase = ''
1✔
532
        if fileset_types and len(fileset_subclasses) == 1 and ('AuxiliarySet' in fileset_subclasses):
1✔
533
            if not (assay_terms and all(x in crispr_screen_terms for x in assay_terms)):
1✔
534
                file_set_type_phrase = ', '.join(fileset_types)
1✔
535

536
        control_phrase = ''
1✔
537
        if len(control_type_set) > 0:
1✔
538
            suffix = ''
1✔
539
            if len(control_type_set) > 1:
1✔
540
                suffix = 's'
1✔
541
            control_phrase = f'with {", ".join(sorted(control_type_set))} control{suffix}'
1✔
542

543
        all_phrases = [
1✔
544
            assay_title_phrase,
545
            targeted_genes_phrase,
546
            cls_phrase,
547
            file_set_type_phrase,
548
            control_phrase
549
        ]
550
        merged_phrase = ' '.join([x for x in all_phrases if x != '']).replace(' : ', ': ')
1✔
551
        if merged_phrase:
1✔
552
            return merged_phrase
1✔
553
        else:
554
            # Failsafe return value.
555
            return file_set_type
×
556

557
    @calculated_property(
1✔
558
        define=True,
559
        schema={
560
            'title': 'Assay Titles',
561
            'description': 'Title(s) of assays that produced data analyzed in the analysis set.',
562
            'type': 'array',
563
            'minItems': 1,
564
            'uniqueItems': True,
565
            'items': {
566
                'title': 'Assay Title',
567
                'description': 'Title of assay that produced data analyzed in the analysis set.',
568
                'type': 'string'
569
            },
570
            'notSubmittable': True,
571
        }
572
    )
573
    def assay_titles(self, request, input_file_sets=None):
1✔
574
        assay_titles = set()
1✔
575
        if input_file_sets is not None:
1✔
576
            only_construct_library_sets = False
1✔
577
            if all(input_file_set.startswith('/construct-library-sets/') for input_file_set in input_file_sets):
1✔
578
                only_construct_library_sets = True
1✔
579
            for fileset in input_file_sets:
1✔
580
                file_set_object = request.embed(fileset, '@@object')
1✔
581
                if 'MeasurementSet' in file_set_object.get('@type'):
1✔
582
                    preferred_assay_title = file_set_object.get('preferred_assay_title')
1✔
583
                    if preferred_assay_title:
1✔
584
                        assay_titles.add(preferred_assay_title)
1✔
585
                elif 'AnalysisSet' in file_set_object.get('@type'):
1✔
586
                    input_analysis_assay_titles = set(file_set_object.get('assay_titles', []))
1✔
587
                    if input_analysis_assay_titles:
1✔
588
                        assay_titles = assay_titles | input_analysis_assay_titles
1✔
589
                elif 'AuxiliarySet' in file_set_object.get('@type'):
1✔
590
                    for measurement_set in file_set_object.get('measurement_sets'):
1✔
591
                        measurement_set_object = request.embed(measurement_set, '@@object')
1✔
592
                        preferred_assay_title = measurement_set_object.get('preferred_assay_title')
1✔
593
                        if preferred_assay_title:
1✔
594
                            assay_titles.add(preferred_assay_title)
1✔
595
                elif 'ConstructLibrarySet' in file_set_object.get('@type') and only_construct_library_sets:
1✔
596
                    for sample in file_set_object.get('applied_to_samples', []):
1✔
597
                        sample_object = request.embed(
1✔
598
                            sample, '@@object_with_select_calculated_properties?field=file_sets')
599
                        for file_set in sample_object.get('file_sets', []):
1✔
600
                            file_set_object = request.embed(
1✔
601
                                file_set, '@@object_with_select_calculated_properties?field=preferred_assay_title')
602
                            preferred_assay_title = file_set_object.get('preferred_assay_title')
1✔
603
                            if preferred_assay_title:
1✔
604
                                assay_titles.add(preferred_assay_title)
1✔
605
            return list(assay_titles)
1✔
606

607
    @calculated_property(
1✔
608
        condition='input_file_sets',
609
        define=True,
610
        schema={
611
            'title': 'Samples',
612
            'description': 'Samples associated with this analysis set.',
613
            'type': 'array',
614
            'minItems': 1,
615
            'uniqueItems': True,
616
            'items': {
617
                'title': 'Sample',
618
                'description': 'Sample associated with this analysis set.',
619
                'type': 'string',
620
                'linkTo': 'Sample'
621
            },
622
            'notSubmittable': True,
623
        }
624
    )
625
    def samples(self, request, input_file_sets=None, demultiplexed_samples=None):
1✔
626
        if input_file_sets is not None:
1✔
627
            samples = set()
1✔
628
            for fileset in input_file_sets:
1✔
629
                input_file_set_object = request.embed(fileset, '@@object')
1✔
630
                input_file_set_samples = set(input_file_set_object.get('samples', []))
1✔
631
                if input_file_set_samples:
1✔
632
                    samples = samples | input_file_set_samples
1✔
633
            samples = list(samples)
1✔
634
            if demultiplexed_samples:
1✔
635
                # if the analysis set specifies a demultiplexed sample and all input data is multiplexed return just the demultiplexed_sample
636
                if not ([sample for sample in samples if not (sample.startswith('/multiplexed-samples/'))]):
1✔
637
                    return demultiplexed_samples
1✔
638
            return samples
1✔
639

640
    @calculated_property(
1✔
641
        condition='samples',
642
        schema={
643
            'title': 'Donors',
644
            'description': 'The donors of the samples associated with this analysis set.',
645
            'type': 'array',
646
            'minItems': 1,
647
            'uniqueItems': True,
648
            'items': {
649
                'title': 'Donor',
650
                'description': 'Donor of a sample associated with this analysis set.',
651
                'type': 'string',
652
                'linkTo': 'Donor'
653
            },
654
            'notSubmittable': True,
655
        }
656
    )
657
    def donors(self, request, samples=None):
1✔
658
        return get_donors_from_samples(request, samples)
1✔
659

660
    @calculated_property(
1✔
661
        schema={
662
            'title': 'Protocols',
663
            'description': 'Links to the protocol(s) for conducting the assay on Protocols.io.',
664
            'type': 'array',
665
            'minItems': 1,
666
            'uniqueItems': True,
667
            'items': {
668
                'title': 'Protocol',
669
                'type': 'string',
670
                'pattern': '^https://www\\.protocols\\.io/(\\S+)$'
671
            },
672
            'notSubmittable': True
673
        }
674
    )
675
    def protocols(self, request, input_file_sets=None):
1✔
676
        '''Calculate an array of unique protocols for all measurement sets associated with an analysis set.'''
677
        protocols = set()
1✔
678
        file_set_objs = get_fileset_objs_from_input_file_sets(request=request, input_file_sets=input_file_sets)
1✔
679
        for file_set_obj in file_set_objs:
1✔
680
            if 'MeasurementSet' in file_set_obj.get('@type'):
1✔
681
                protocol = file_set_obj.get('protocols', [])
1✔
682
                if protocol:
1✔
683
                    protocols.update(protocol)
1✔
684
        return list(protocols)
1✔
685

686
    @calculated_property(
1✔
687
        condition='samples',
688
        schema={
689
            'title': 'Simplified Sample Summary',
690
            'description': 'A summary of the samples associated with input file sets of this analysis set.',
691
            'type': 'string',
692
            'notSubmittable': True,
693
        }
694
    )
695
    def sample_summary(self, request, samples=None):
1✔
696
        taxa = set()
1✔
697
        sample_classification_term_target = dict()
1✔
698
        treatment_purposes = set()
1✔
699
        treatment_summaries = set()
1✔
700
        differentiation_times = set()
1✔
701
        construct_library_set_types = set()
1✔
702
        modification_summaries = set()
1✔
703
        sorted_from = set()
1✔
704
        targeted_genes_for_sorting = set()
1✔
705
        cellular_sub_pools = set()
1✔
706

707
        treatment_purpose_to_adjective = {
1✔
708
            'activation': 'activated',
709
            'agonist': 'agonized',
710
            'antagonist': 'antagonized',
711
            'control': 'treated with a control',
712
            'differentiation': 'differentiated',
713
            'de-differentiation': 'de-differentiated',
714
            'perturbation': 'perturbed',
715
            'selection': 'selected',
716
            'stimulation': 'stimulated'
717
        }
718

719
        two_classification_cases = {
1✔
720
            'differentiated cell specimen, pooled cell specimen': ['pooled differentiated cell specimen'],
721
            'pooled cell specimen, reprogrammed cell specimen': ['pooled reprogrammed cell specimen'],
722
            'cell line, pooled cell specimen': ['pooled cell specimen']
723
        }
724

725
        classification_to_prefix = {
1✔
726
            'differentiated cell specimen': 'differentiated',
727
            'reprogrammed cell specimen': 'reprogrammed',
728
            'pooled differentiated cell specimen': 'pooled differentiated',
729
            'pooled reprogrammed cell specimen': 'pooled reprogrammed'
730
        }
731

732
        for sample in samples:
1✔
733
            sample_object = request.embed(sample, '@@object')
1✔
734

735
            taxa.add(sample_object.get('taxa', ''))
1✔
736

737
            # Group sample and targeted sample terms according to classification.
738
            # Other metadata such as treatment info are lumped together.
739
            mux_prefix = ''
1✔
740
            sample_classifications = sorted(sample_object['classifications'])
1✔
741
            if 'multiplexed sample' in sample_object['classifications']:
1✔
742
                sample_classifications.remove('multiplexed sample')
1✔
743
                mux_prefix = 'multiplexed sample of '
1✔
744
            if ', '.join(sorted(sample_classifications)) in two_classification_cases:
1✔
745
                sample_classifications = two_classification_cases[', '.join(sorted(sample_classifications))]
1✔
746
            # The variable "classification" can potentially be very long for
747
            # a Multiplexed Sample, but it will be entirely dropped for
748
            # Multiplexed Sample in the end - so it is ok.
749
            classification = f"{mux_prefix}{' and '.join(sample_classifications)}"
1✔
750
            if classification not in sample_classification_term_target:
1✔
751
                sample_classification_term_target[classification] = set()
1✔
752

753
            for term in sample_object['sample_terms']:
1✔
754
                sample_term_object = request.embed(term, '@@object?skip_calculated=true')
1✔
755
                sample_phrase = f"{sample_term_object['term_name']}"
1✔
756
                # Avoid redundancy of classification and term name
757
                # e.g. "HFF-1 cell cell line"
758
                if not classification.startswith('multiplexed sample of'):
1✔
759
                    if sample_phrase.endswith('cell') and 'cell' in classification:
1✔
760
                        sample_phrase = sample_phrase.replace('cell', classification)
1✔
761
                    elif sample_phrase.endswith(' gastruloid') and 'gastruloid' in classification:
1✔
762
                        sample_phrase = sample_phrase.replace(' gastruloid', '')
×
763
                    elif 'cell' in sample_phrase and classification in classification_to_prefix:
1✔
764
                        sample_phrase = f'{classification_to_prefix[classification]} {sample_phrase}'
×
765

766
                targeted_sample_suffix = ''
1✔
767
                if 'targeted_sample_term' in sample_object:
1✔
768
                    targeted_sample_term_object = request.embed(
1✔
769
                        sample_object['targeted_sample_term'], '@@object?skip_calculated=true')
770
                    targeted_sample_suffix = f"induced to {targeted_sample_term_object['term_name']}"
1✔
771
                if targeted_sample_suffix:
1✔
772
                    sample_phrase = f'{sample_phrase} {targeted_sample_suffix}'
1✔
773
                sample_classification_term_target[classification].add(sample_phrase)
1✔
774

775
            if 'time_post_change' in sample_object:
1✔
776
                time = sample_object['time_post_change']
1✔
777
                time_unit = sample_object['time_post_change_units']
1✔
778
                differentiation_times.add(f'{time} {time_unit}')
1✔
779
            if 'modifications' in sample_object:
1✔
780
                for modification in sample_object['modifications']:
1✔
781
                    modification_object = request.embed(
1✔
782
                        modification, '@@object_with_select_calculated_properties?field=summary')
783
                    modification_summaries.add(modification_object['summary'])
1✔
784
            if 'construct_library_sets' in sample_object:
1✔
785
                for construct_library_set in sample_object['construct_library_sets']:
1✔
786
                    cls_object = request.embed(construct_library_set, '@@object?skip_calculated=true')
1✔
787
                    construct_library_set_types.add(cls_object['file_set_type'])
1✔
788
            if 'sorted_from' in sample_object:
1✔
789
                sorted_from.add(True)
1✔
790
                for file_set in sample_object['file_sets']:
1✔
791
                    if file_set.startswith('/measurement-sets/'):
1✔
792
                        fileset_object = request.embed(file_set, '@@object?skip_calculated=true')
1✔
793
                        if 'targeted_genes' in fileset_object:
1✔
794
                            for gene in fileset_object['targeted_genes']:
1✔
795
                                gene_object = request.embed(gene, '@@object?skip_calculated=true')
1✔
796
                                targeted_genes_for_sorting.add(gene_object['symbol'])
1✔
797
            if 'treatments' in sample_object:
1✔
798
                for treatment in sample_object['treatments']:
1✔
799
                    treatment_object = request.embed(
1✔
800
                        treatment, '@@object_with_select_calculated_properties?field=summary')
801
                    treatment_purposes.add(treatment_purpose_to_adjective.get(treatment_object['purpose'], ''))
1✔
802
                    truncated_summary = treatment_object['summary'].split(' of ')[1]
1✔
803
                    treatment_summaries.add(truncated_summary)
1✔
804
            if 'cellular_sub_pool' in sample_object:
1✔
805
                cellular_sub_pools.add(sample_object['cellular_sub_pool'])
1✔
806

807
        all_sample_terms = []
1✔
808
        for classification in sorted(sample_classification_term_target.keys()):
1✔
809
            terms_by_classification = f"{', '.join(sorted(sample_classification_term_target[classification]))}"
1✔
810
            # Put the terms after the "multiplexed sample of" and drop
811
            # the underlying classifications
812
            if 'multiplexed sample of' in classification:
1✔
813
                terms_by_classification = f'multiplexed sample of {terms_by_classification}'
1✔
814
            # Differentiated, reprogrammed, pooled cell specimen can be merged
815
            # into the terms_by_classification before this. Therefore we don't
816
            # want to append it to the terms_by_classification a second time.
817
            elif not any(x in terms_by_classification for x in [
1✔
818
                    'differentiated cell specimen', 'reprogrammed cell specimen', 'pooled cell specimen', 'primary cell']
819
            ):
820
                # Insert the classification before the targeted_sample_term if it exists.
821
                if 'induced to' in terms_by_classification:
1✔
822
                    terms_by_classification = terms_by_classification.replace(
1✔
823
                        'induced to', f'{classification} induced to'
824
                    )
825
                else:
826
                    terms_by_classification = f'{terms_by_classification} {classification}'
1✔
827
            elif any(x in terms_by_classification for x in [
1✔
828
                    'differentiated cell specimen', 'reprogrammed cell specimen', 'pooled cell specimen', 'primary cell']
829
            ):
830
                # Don't add anything when the classification was already in
831
                # the terms_by_classification.
832
                terms_by_classification = f'{terms_by_classification}'
1✔
833
            # Failsafe case.
834
            else:
835
                terms_by_classification = f'{terms_by_classification} {classification}'
×
836

837
            all_sample_terms.append(terms_by_classification)
1✔
838

839
        differentiation_time_phrase = ''
1✔
840
        if differentiation_times:
1✔
841
            differentiation_time_phrase = f'at {", ".join(sorted(differentiation_times))}(s) post change'
1✔
842
        treatments_phrase = ''
1✔
843
        if treatment_purposes and treatment_summaries:
1✔
844
            treatments_phrase = f"{', '.join(sorted(treatment_purposes))} with {', '.join(sorted(treatment_summaries))}"
1✔
845
        modification_summary_phrase = ''
1✔
846
        if modification_summaries:
1✔
847
            modification_summaries = sorted(modification_summaries)
1✔
848
            modification_summary_phrase = f'modified with {", ".join(modification_summaries)}'
1✔
849
        construct_library_set_type_phrase = ''
1✔
850
        if construct_library_set_types:
1✔
851
            construct_library_set_type_phrase = f'transfected with a {", ".join(construct_library_set_types)}'
1✔
852
        sorted_phrase = ''
1✔
853
        if sorted_from:
1✔
854
            if targeted_genes_for_sorting:
1✔
855
                sorted_phrase = f'sorted on expression of {", ".join(targeted_genes_for_sorting)}'
1✔
856
            else:
857
                sorted_phrase = f'sorted into bins'
×
858
        cellular_sub_pool_phrase = ''
1✔
859
        if cellular_sub_pools:
1✔
860
            cellular_sub_pool_phrase = f'cellular sub pool(s): {", ".join(sorted(cellular_sub_pools))}'
1✔
861

862
        additional_phrases = [
1✔
863
            differentiation_time_phrase,
864
            treatments_phrase,
865
            modification_summary_phrase,
866
            construct_library_set_type_phrase,
867
            sorted_phrase,
868
            cellular_sub_pool_phrase
869
        ]
870
        additional_phrases_joined = ', '.join([x for x in additional_phrases if x != ''])
1✔
871
        additional_phrase_suffix = ''
1✔
872
        if additional_phrases_joined:
1✔
873
            additional_phrase_suffix = f', {additional_phrases_joined}'
1✔
874
        summary = f"{', '.join(taxa)} {', '.join(all_sample_terms)}{additional_phrase_suffix}"
1✔
875

876
        return summary
1✔
877

878
    @calculated_property(
1✔
879
        schema={
880
            'title': 'Functional Assay Mechanisms',
881
            'description': 'The biological processes measured by the functional assays.',
882
            'type': 'array',
883
            'minItems': 1,
884
            'uniqueItems': True,
885
            'items': {
886
                'title': 'Phenotype Term',
887
                'type': 'string',
888
                'linkTo': 'PhenotypeTerm'
889
            },
890
            'notSubmittable': True
891
        }
892
    )
893
    def functional_assay_mechanisms(self, request, input_file_sets=None):
1✔
894
        mechanism_objects = []
1✔
895
        file_set_objects = get_fileset_objs_from_input_file_sets(request=request, input_file_sets=input_file_sets)
1✔
896
        for file_set_object in file_set_objects:
1✔
897
            if 'MeasurementSet' in file_set_object.get('@type') or 'AnalysisSet' in file_set_object.get('@type'):
1✔
898
                mechanism_objects.extend(file_set_object.get('functional_assay_mechanisms', []))
1✔
899
        return list(set(mechanism_objects))
1✔
900

901
    @calculated_property(
1✔
902
        schema={
903
            'title': 'Workflows',
904
            'description': 'A workflow for computational analysis of genomic data. A workflow is made up of analysis steps.',
905
            'type': 'array',
906
            'notSubmittable': True,
907
            'uniqueItem': True,
908
            'minItems': 1,
909
            'items': {
910
                'title': 'Workflow',
911
                'type': 'string',
912
                'linkTo': 'Workflow'
913
            }
914
        }
915
    )
916
    def workflows(self, request, files=None):
1✔
917
        analysis_set_workflows_set = set()
1✔
918
        # Get a list of file objects
919
        file_objs = get_file_objs_from_files(request, files)
1✔
920
        for file_obj in file_objs:
1✔
921
            analysis_step_version = file_obj.get('analysis_step_version')
1✔
922
            if analysis_step_version:
1✔
923
                # Get analysis step version and request the object
924
                analysis_step_version_obj = request.embed(analysis_step_version, '@@object?skip_calculated=true')
1✔
925
                # Get analysis step and request the object
926
                analysis_step = analysis_step_version_obj.get('analysis_step')
1✔
927
                if analysis_step:
1✔
928
                    analysis_step_obj = request.embed(analysis_step, '@@object?skip_calculated=true')
1✔
929
                    # Get workflow and add to the set
930
                    workflow = analysis_step_obj.get('workflow')
1✔
931
                    if workflow:
1✔
932
                        analysis_set_workflows_set.add(workflow)
1✔
933
        return list(analysis_set_workflows_set)
1✔
934

935

936
@collection(
1✔
937
    name='curated-sets',
938
    unique_key='accession',
939
    properties={
940
        'title': 'Curated Sets',
941
        'description': 'Listing of curated sets',
942
    }
943
)
944
class CuratedSet(FileSet):
1✔
945
    item_type = 'curated_set'
1✔
946
    schema = load_schema('igvfd:schemas/curated_set.json')
1✔
947
    embedded_with_frame = FileSet.embedded_with_frame
1✔
948
    audit_inherit = FileSet.audit_inherit
1✔
949
    set_status_up = FileSet.set_status_up + []
1✔
950
    set_status_down = FileSet.set_status_down + []
1✔
951

952
    @calculated_property(
1✔
953
        define=True,
954
        schema={
955
            'title': 'Assemblies',
956
            'description': 'The genome assemblies to which the referencing files in the file set are utilizing (e.g., GRCh38).',
957
            'type': 'array',
958
            'minItems': 1,
959
            'uniqueItems': True,
960
            'items': {
961
                'title': 'Assembly',
962
                'type': 'string'
963
            },
964
            'notSubmittable': True,
965
        }
966
    )
967
    def assemblies(self, request, files=None):
1✔
968
        if files:
1✔
969
            assembly_values = set()
1✔
970
            for current_file_path in files:
1✔
971
                file_object = request.embed(current_file_path, '@@object?skip_calculated=true')
1✔
972
                if file_object.get('assembly'):
1✔
973
                    assembly_values.add(file_object.get('assembly'))
1✔
974
            if assembly_values:
1✔
975
                return sorted(list(assembly_values))
1✔
976

977
    @calculated_property(
1✔
978
        define=True,
979
        schema={
980
            'title': 'Transcriptome Annotations',
981
            'description': 'The annotation versions of the reference resource.',
982
            'type': 'array',
983
            'minItems': 1,
984
            'uniqueItems': True,
985
            'items': {
986
                'title': 'Transcriptome Annotation',
987
                'type': 'string'
988
            },
989
            'notSubmittable': True,
990
        }
991
    )
992
    def transcriptome_annotations(self, request, files=None):
1✔
993
        if files:
1✔
994
            annotation_values = set()
1✔
995
            for current_file_path in files:
1✔
996
                file_object = request.embed(current_file_path, '@@object?skip_calculated=true')
1✔
997
                if file_object.get('transcriptome_annotation'):
1✔
998
                    annotation_values.add(file_object.get('transcriptome_annotation'))
1✔
999
            if annotation_values:
1✔
1000
                return sorted(list(annotation_values))
1✔
1001

1002
    @calculated_property(
1✔
1003
        schema={
1004
            'title': 'Summary',
1005
            'type': 'string',
1006
            'notSubmittable': True,
1007
        }
1008
    )
1009
    def summary(self, file_set_type, assemblies=None, transcriptome_annotations=None, taxa=None):
1✔
1010
        summary_message = ''
1✔
1011
        if taxa:
1✔
1012
            summary_message += f'{taxa} '
1✔
1013
        if assemblies:
1✔
1014
            assembly_values_joined = ' '.join(assemblies)
1✔
1015
            summary_message += f'{assembly_values_joined} '
1✔
1016
        if transcriptome_annotations:
1✔
1017
            annotation_values_joined = ' '.join(transcriptome_annotations)
1✔
1018
            summary_message += f'{annotation_values_joined} '
1✔
1019
        summary_message += file_set_type
1✔
1020
        return summary_message
1✔
1021

1022

1023
@collection(
1✔
1024
    name='measurement-sets',
1025
    unique_key='accession',
1026
    properties={
1027
        'title': 'Measurement Sets',
1028
        'description': 'Listing of measurement sets',
1029
    })
1030
class MeasurementSet(FileSet):
1✔
1031
    item_type = 'measurement_set'
1✔
1032
    schema = load_schema('igvfd:schemas/measurement_set.json')
1✔
1033
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
1034
        Path('assay_term', include=['@id', 'term_name', 'assay_slims', 'status']),
1035
        Path('control_file_sets', include=['@id', 'accession', 'aliases', 'status']),
1036
        Path('related_multiome_datasets', include=['@id', 'accession', 'status']),
1037
        Path('auxiliary_sets', include=['@id', 'accession', 'aliases', 'file_set_type', 'status']),
1038
        Path('construct_library_sets.small_scale_gene_list', include=[
1039
             '@id', 'small_scale_gene_list', 'summary', 'geneid', 'symbol', 'name', 'status']),
1040
        Path('files.sequencing_platform', include=['@id', 'term_name', 'status']),
1041
        Path('targeted_genes', include=['@id', 'geneid', 'symbol', 'name', 'synonyms', 'status']),
1042
        Path('functional_assay_mechanisms', include=['@id', 'term_id', 'term_name', 'status'])
1043
    ]
1044

1045
    audit_inherit = FileSet.audit_inherit + [
1✔
1046
        'auxiliary_sets',
1047
        'assay_term'
1048
    ]
1049

1050
    set_status_up = FileSet.set_status_up + [
1✔
1051
        'assay_term',
1052
        'auxiliary_sets'
1053
    ]
1054
    set_status_down = FileSet.set_status_down + []
1✔
1055

1056
    @calculated_property(
1✔
1057
        condition='multiome_size',
1058
        schema={
1059
            'title': 'Related Multiome Datasets',
1060
            'description': 'Related datasets included in the multiome experiment this measurement set is a part of.',
1061
            'type': 'array',
1062
            'minItems': 1,
1063
            'uniqueItems': True,
1064
            'items': {
1065
                'title': 'Related Multiome Dataset',
1066
                'description': 'Related dataset included in the multiome experiment this measurement set is a part of.',
1067
                'type': 'string',
1068
                'linkTo': 'MeasurementSet'
1069
            },
1070
            'notSubmittable': True,
1071
        }
1072
    )
1073
    def related_multiome_datasets(self, request, samples=None):
1✔
1074
        object_id = self.jsonld_id(request)
1✔
1075
        if samples:
1✔
1076
            related_datasets = []
1✔
1077
            for sample in samples:
1✔
1078
                sample_object = request.embed(sample, '@@object')
1✔
1079
                if sample_object.get('file_sets'):
1✔
1080
                    for file_set_id in sample_object.get('file_sets'):
1✔
1081
                        if '/measurement-sets/' == file_set_id[:18] and \
1✔
1082
                            object_id != file_set_id and \
1083
                                file_set_id not in related_datasets:
1084
                            related_datasets.append(file_set_id)
1✔
1085
            return related_datasets
1✔
1086

1087
    @calculated_property(
1✔
1088
        schema={
1089
            'title': 'Summary',
1090
            'type': 'string',
1091
            'notSubmittable': True,
1092
        }
1093
    )
1094
    def summary(self, request, assay_term, preferred_assay_title=None, samples=None, control_type=None, targeted_genes=None, construct_library_sets=[]):
1✔
1095
        assay = request.embed(assay_term)['term_name']
1✔
1096
        modality_set = set()
1✔
1097
        cls_set = set()
1✔
1098
        cls_type_set = set()
1✔
1099
        control_phrase = ''
1✔
1100
        cls_phrase = ''
1✔
1101
        modality_phrase = ''
1✔
1102
        assay_phrase = ''
1✔
1103
        target_phrase = ''
1✔
1104

1105
        for sample in samples:
1✔
1106
            sample_object = request.embed(sample, '@@object')
1✔
1107
            if sample_object.get('modifications'):
1✔
1108
                for modification in sample_object.get('modifications'):
1✔
1109
                    modality = request.embed(modification).get('modality', '')
1✔
1110
                    if modality:
1✔
1111
                        modality_set.add(modality)
1✔
1112

1113
        if construct_library_sets:
1✔
1114
            for construct_library_set in construct_library_sets:
1✔
1115
                construct_library_set_object = request.embed(
1✔
1116
                    construct_library_set, '@@object_with_select_calculated_properties?field=summary')
1117
                cls_type_set.add(construct_library_set_object['file_set_type'])
1✔
1118
                cls_set.add(construct_library_set_object['summary'])
1✔
1119

1120
        if preferred_assay_title in ['10x multiome', '10x multiome with MULTI-seq', 'SHARE-seq']:
1✔
1121
            assay = f'{assay} ({preferred_assay_title})'
1✔
1122
        else:
1123
            assay = preferred_assay_title
1✔
1124

1125
        if targeted_genes:
1✔
1126
            # Special case for CRISPR screens using flow cytometry
1127
            if request.embed(assay_term)['term_id'] == 'OBI:0003661':
1✔
1128
                target_phrase = f' sorted on the expression of'
1✔
1129
            else:
1130
                target_phrase = f' targeting'
1✔
1131
            if len(targeted_genes) > 5:
1✔
1132
                target_phrase = f'{target_phrase} {len(targeted_genes)} genes'
1✔
1133
            elif len(targeted_genes) <= 5:
1✔
1134
                genes = []
1✔
1135
                for targeted_gene in targeted_genes:
1✔
1136
                    gene_object = request.embed(targeted_gene, '@@object?skip_calculated=true')
1✔
1137
                    gene_name = (gene_object.get('symbol'))
1✔
1138
                    genes.append(gene_name)
1✔
1139
                genes = sorted(genes)
1✔
1140
                target_phrase = f'{target_phrase} {", ".join(genes)}'
1✔
1141

1142
        if control_type:
1✔
1143
            control_phrase = f'{control_type} '
1✔
1144
        # Special case for Y2H assays if control_type is not specified.
1145
        if request.embed(assay_term)['term_id'] == 'OBI:0000288' and control_type is None:
1✔
1146
            control_phrase = 'post-selection '
1✔
1147

1148
        if 'guide library' in cls_type_set:
1✔
1149
            if 'CRISPR' not in assay:
1✔
1150
                assay = f'CRISPR {assay}'
1✔
1151

1152
        if len(modality_set) > 1:
1✔
1153
            modality_set = ', '.join(modality_set)
1✔
1154
            if 'CRISPR' in assay:
1✔
1155
                assay_phrase = assay.replace('CRISPR', f'CRISPR {modality_set}')
1✔
1156
            else:
1157
                modality_phrase = f'{modality_set} '
1✔
1158
                assay_phrase = f'{assay}'
1✔
1159
            assay_phrase = f' {assay}'
1✔
1160
        if len(modality_set) == 1:
1✔
1161
            modality_set = ''.join(modality_set)
1✔
1162
            if 'CRISPR' in assay:
1✔
1163
                assay_phrase = assay.replace('CRISPR', f'CRISPR {modality_set}')
1✔
1164
            else:
1165
                modality_phrase = f'{modality_set} '
1✔
1166
                assay_phrase = f'{assay}'
1✔
1167
        if len(modality_set) == 0:
1✔
1168
            assay_phrase = f'{assay}'
1✔
1169

1170
        if len(cls_set) > 0:
1✔
1171
            cls_phrase = f' {get_cls_phrase(cls_set)}'
1✔
1172

1173
        sentence = ''
1✔
1174
        sentence_parts = [
1✔
1175
            control_phrase,
1176
            modality_phrase,
1177
            assay_phrase,
1178
            target_phrase,
1179
            cls_phrase,
1180
        ]
1181
        for phrase in sentence_parts:
1✔
1182
            if phrase != '':
1✔
1183
                sentence += phrase
1✔
1184
        return sentence
1✔
1185

1186
    @calculated_property(
1✔
1187
        condition='samples',
1188
        schema={
1189
            'title': 'Donors',
1190
            'description': 'The donors of the samples associated with this measurement set.',
1191
            'type': 'array',
1192
            'minItems': 1,
1193
            'uniqueItems': True,
1194
            'items': {
1195
                'title': 'Donor',
1196
                'description': 'Donor of a sample associated with this measurement set.',
1197
                'type': 'string',
1198
                'linkTo': 'Donor'
1199
            },
1200
            'notSubmittable': True,
1201
        }
1202
    )
1203
    def donors(self, request, samples=None):
1✔
1204
        return get_donors_from_samples(request, samples)
1✔
1205

1206
    @calculated_property(
1✔
1207
        schema={
1208
            'title': 'Externally Hosted',
1209
            'type': 'boolean',
1210
            'notSubmittable': True,
1211
        }
1212
    )
1213
    def externally_hosted(self, request, files=None):
1✔
1214
        externally_hosted_value = False
1✔
1215
        if files:
1✔
1216
            for current_file_path in files:
1✔
1217
                file_object = request.embed(current_file_path, '@@object?skip_calculated=true')
1✔
1218
                if file_object.get('externally_hosted'):
1✔
1219
                    externally_hosted_value = True
1✔
1220
        return externally_hosted_value
1✔
1221

1222

1223
@collection(
1✔
1224
    name='model-sets',
1225
    unique_key='accession',
1226
    properties={
1227
        'title': 'Model Sets',
1228
        'description': 'Listing of model sets',
1229
    }
1230
)
1231
class ModelSet(FileSet):
1✔
1232
    item_type = 'model_set'
1✔
1233
    schema = load_schema('igvfd:schemas/model_set.json')
1✔
1234
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
1235
        Path('input_file_sets', include=['@id', 'accession', 'aliases', 'status']),
1236
        Path('assessed_genes', include=['@id', 'geneid', 'symbol', 'name', 'synonyms', 'status']),
1237
        Path('software_versions.software', include=['@id', 'summary', 'title', 'source_url', 'download_id', 'status'])
1238
    ]
1239
    audit_inherit = FileSet.audit_inherit
1✔
1240
    set_status_up = FileSet.set_status_up + [
1✔
1241
        'software_versions'
1242
    ]
1243
    set_status_down = FileSet.set_status_down + []
1✔
1244

1245
    @calculated_property(
1✔
1246
        schema={
1247
            'title': 'Summary',
1248
            'type': 'string',
1249
            'notSubmittable': True,
1250
        }
1251
    )
1252
    def summary(self, request, file_set_type, model_name, model_version, prediction_objects, assessed_genes=None):
1✔
1253
        # Get assessed genes phrase
1254
        assessed_gene_phrase = get_assessed_gene_phrase(request, assessed_genes)
1✔
1255
        return ' '.join(filter(None, [
1✔
1256
            model_name,
1257
            model_version,
1258
            file_set_type,
1259
            f'for {assessed_gene_phrase}' if assessed_genes else '',
1260
            'predicting',
1261
            ', '.join(prediction_objects)
1262
        ]))
1263

1264
    @calculated_property(
1✔
1265
        schema={
1266
            'title': 'Externally Hosted',
1267
            'type': 'boolean',
1268
            'notSubmittable': True,
1269
        }
1270
    )
1271
    def externally_hosted(self, request, files=None):
1✔
1272
        externally_hosted_value = False
1✔
1273
        if files:
1✔
1274
            for current_file_path in files:
1✔
1275
                file_object = request.embed(current_file_path, '@@object?skip_calculated=true')
1✔
1276
                if file_object.get('externally_hosted'):
1✔
1277
                    externally_hosted_value = True
1✔
1278
        return externally_hosted_value
1✔
1279

1280
    @calculated_property(
1✔
1281
        schema={
1282
            'title': 'Software Versions',
1283
            'description': 'The software versions used to produce this predictive model.',
1284
            'type': 'array',
1285
            'minItems': 1,
1286
            'uniqueItems': True,
1287
            'items': {
1288
                'title': 'Software Version',
1289
                'description': 'A software version used to produce this predictive model.',
1290
                'type': 'string',
1291
                'linkTo': 'SoftwareVersion',
1292
            },
1293
            'notSubmittable': True
1294

1295
        }
1296
    )
1297
    def software_versions(self, request, files=None):
1✔
1298
        software_versions = []
1✔
1299
        if files:
1✔
1300
            for file in files:
1✔
1301
                if file.startswith('/model-files/'):
1✔
1302
                    file_object = request.embed(file, '@@object?skip_calculated=true')
1✔
1303
                    analysis_step_version = file_object.get('analysis_step_version', '')
1✔
1304
                    if analysis_step_version:
1✔
1305
                        analysis_step_version_object = request.embed(
1✔
1306
                            analysis_step_version, '@@object?skip_calculated=true')
1307
                        software_versions = software_versions + \
1✔
1308
                            analysis_step_version_object.get('software_versions', [])
1309
        if software_versions:
1✔
1310
            return list(set(software_versions))
1✔
1311

1312

1313
@collection(
1✔
1314
    name='auxiliary-sets',
1315
    unique_key='accession',
1316
    properties={
1317
        'title': 'Auxiliary Sets',
1318
        'description': 'Listing of auxiliary sets',
1319
    })
1320
class AuxiliarySet(FileSet):
1✔
1321
    item_type = 'auxiliary_set'
1✔
1322
    schema = load_schema('igvfd:schemas/auxiliary_set.json')
1✔
1323
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
1324
        Path('measurement_sets', include=['@id', 'accession', 'aliases', 'preferred_assay_title', 'status']),
1325
    ]
1326
    audit_inherit = FileSet.audit_inherit
1✔
1327
    rev = FileSet.rev | {'measurement_sets': ('MeasurementSet', 'auxiliary_sets')}
1✔
1328
    set_status_up = FileSet.set_status_up + [
1✔
1329

1330
    ]
1331
    set_status_down = FileSet.set_status_down + []
1✔
1332

1333
    @calculated_property(schema={
1✔
1334
        'title': 'Measurement Sets',
1335
        'description': 'The measurement sets that link to this auxiliary set.',
1336
        'type': 'array',
1337
        'minItems': 1,
1338
        'uniqueItems': True,
1339
        'items': {
1340
            'title': 'Measurement Set',
1341
            'type': ['string', 'object'],
1342
            'linkFrom': 'MeasurementSet.auxiliary_sets',
1343
        },
1344
        'notSubmittable': True
1345
    })
1346
    def measurement_sets(self, request, measurement_sets):
1✔
1347
        return paths_filtered_by_status(request, measurement_sets)
1✔
1348

1349
    @calculated_property(
1✔
1350
        schema={
1351
            'title': 'Summary',
1352
            'type': 'string',
1353
            'notSubmittable': True,
1354
        }
1355
    )
1356
    def summary(self, request, file_set_type, measurement_sets=None):
1✔
1357
        if not measurement_sets:
1✔
1358
            return f'{file_set_type}'
1✔
1359
        measurement_sets_summaries = sorted(list(set(
1✔
1360
            [request.embed(measurement_set, '@@object_with_select_calculated_properties?field=summary').get('summary') for measurement_set in measurement_sets if measurement_set])))
1361
        return f'{file_set_type} for {", ".join(measurement_sets_summaries)}'
1✔
1362

1363
    @calculated_property(
1✔
1364
        condition='samples',
1365
        schema={
1366
            'title': 'Donors',
1367
            'description': 'The donors of the samples associated with this auxiliary set.',
1368
            'type': 'array',
1369
            'minItems': 1,
1370
            'uniqueItems': True,
1371
            'items': {
1372
                'title': 'Donor',
1373
                'description': 'Donor of a sample associated with this auxiliary set.',
1374
                'type': 'string',
1375
                'linkTo': 'Donor'
1376
            },
1377
            'notSubmittable': True,
1378
        }
1379
    )
1380
    def donors(self, request, samples=None):
1✔
1381
        return get_donors_from_samples(request, samples)
1✔
1382

1383

1384
@collection(
1✔
1385
    name='prediction-sets',
1386
    unique_key='accession',
1387
    properties={
1388
        'title': 'Prediction Sets',
1389
        'description': 'Listing of prediction sets',
1390
    })
1391
class PredictionSet(FileSet):
1✔
1392
    item_type = 'prediction_set'
1✔
1393
    schema = load_schema('igvfd:schemas/prediction_set.json')
1✔
1394
    embedded_with_frame = FileSet.embedded_with_frame + [
1✔
1395
        Path('samples.construct_library_sets', include=['@id', 'accession', 'summary', 'status']),
1396
        Path('large_scale_gene_list', include=['@id', 'accession', 'aliases', 'status']),
1397
        Path('large_scale_loci_list', include=['@id', 'accession', 'aliases', 'status']),
1398
        Path('small_scale_gene_list', include=['@id', 'geneid', 'symbol', 'name', 'synonyms', 'status']),
1399
        Path('assessed_genes', include=['@id', 'geneid', 'symbol', 'name', 'synonyms', 'status']),
1400
    ]
1401
    audit_inherit = FileSet.audit_inherit
1✔
1402
    set_status_up = FileSet.set_status_up + []
1✔
1403
    set_status_down = FileSet.set_status_down + []
1✔
1404

1405
    @calculated_property(
1✔
1406
        schema={
1407
            'title': 'Summary',
1408
            'type': 'string',
1409
            'description': 'A summary of the prediction set.',
1410
            'notSubmittable': True,
1411
        }
1412
    )
1413
    def summary(self, request, file_set_type, assessed_genes=None, scope=None):
1✔
1414
        # Get scope info
1415
        scope_phrase = ''
1✔
1416
        if scope:
1✔
1417
            scope_phrase = f' on scope of {scope}'
1✔
1418
        # Get assessed genes info
1419
        assessed_genes_phrase = get_assessed_gene_phrase(request, assessed_genes)
1✔
1420
        # Final summary
1421
        return ' '.join(filter(None, [
1✔
1422
            file_set_type,
1423
            f'prediction{scope_phrase}',
1424
            f'for {assessed_genes_phrase}' if assessed_genes else ''
1425
        ]))
1426

1427

1428
@collection(
1✔
1429
    name='construct-library-sets',
1430
    unique_key='accession',
1431
    properties={
1432
        'title': 'Construct Library Sets',
1433
        'description': 'Listing of construct library sets',
1434
    })
1435
class ConstructLibrarySet(FileSet):
1✔
1436
    item_type = 'construct_library_set'
1✔
1437
    schema = load_schema('igvfd:schemas/construct_library_set.json')
1✔
1438
    embedded_with_frame = [
1✔
1439
        Path('award', include=['@id', 'component']),
1440
        Path('lab', include=['@id', 'title']),
1441
        Path('submitted_by', include=['@id', 'title']),
1442
        Path('files', include=['@id', 'accession', 'aliases',
1443
             'content_type', 'href', 'file_format', 'upload_status', 'status']),
1444
        Path('integrated_content_files', include=['@id', 'accession',
1445
             'aliases', 'content_type', 'file_format', 'upload_status', 'status']),
1446
        Path('control_for', include=['@id', 'accession', 'aliases', 'status']),
1447
        Path('associated_phenotypes', include=['@id', 'term_id', 'term_name', 'status']),
1448
        Path('small_scale_gene_list', include=['@id', 'geneid', 'symbol', 'name', 'synonyms', 'status']),
1449
        Path('applied_to_samples', include=['@id', '@type', 'accession',
1450
             'aliases', 'classifications', 'disease_terms', 'donors', 'sample_terms', 'targeted_sample_term', 'status', 'summary', 'modifications', 'treatments', 'nucleic_acid_delivery']),
1451
        Path('applied_to_samples.donors', include=['@id', 'taxa', 'status']),
1452
        Path('applied_to_samples.disease_terms', include=['@id', 'term_name', 'status']),
1453
        Path('applied_to_samples.sample_terms', include=['@id', 'term_name', 'status']),
1454
        Path('applied_to_samples.targeted_sample_term', include=['@id', 'term_name', 'status']),
1455
        Path('applied_to_samples.modifications', include=['@id', 'modality', 'summary', 'status']),
1456
        Path('applied_to_samples.treatments', include=['@id', 'treatment_term_name', 'summary', 'status']),
1457
        Path('large_scale_gene_list', include=['@id', 'accession', 'aliases', 'status']),
1458
        Path('large_scale_loci_list', include=['@id', 'accession', 'aliases', 'status']),
1459
        Path('orf_list', include=['@id', 'orf_id', 'genes', 'aliases', 'status']),
1460
        Path('orf_list.genes', include=['@id', 'symbol', 'status']),
1461
        Path('publications', include=['@id', 'publication_identifiers', 'status']),
1462
    ]
1463
    audit_inherit = [
1✔
1464
        'award',
1465
        'lab',
1466
        'files',
1467
        'documents',
1468
        'integrated_content_files'
1469
    ]
1470

1471
    rev = FileSet.rev | {'applied_to_samples': ('Sample', 'construct_library_sets')}
1✔
1472

1473
    set_status_up = FileSet.set_status_up + []
1✔
1474
    set_status_down = FileSet.set_status_down + []
1✔
1475

1476
    @calculated_property(schema={
1✔
1477
        'title': 'Applied to Samples',
1478
        'description': 'The samples that link to this construct library set.',
1479
        'type': 'array',
1480
        'minItems': 1,
1481
        'uniqueItems': True,
1482
        'items': {
1483
            'title': 'Applied to Sample',
1484
            'type': ['string', 'object'],
1485
            'linkFrom': 'Sample.construct_library_sets',
1486
        },
1487
        'notSubmittable': True
1488
    })
1489
    def applied_to_samples(self, request, applied_to_samples):
1✔
1490
        return paths_filtered_by_status(request, applied_to_samples)
1✔
1491

1492
    @calculated_property(
1✔
1493
        define=True,
1494
        schema={
1495
            'title': 'File Sets',
1496
            'description': 'The file sets that used this construct library set.',
1497
            'type': 'array',
1498
            'minItems': 1,
1499
            'uniqueItems': True,
1500
            'items': {
1501
                'title': 'File Set',
1502
                'type': ['string', 'object'],
1503
                'linkTo': 'FileSet'
1504
            },
1505
            'notSubmittable': True
1506
        })
1507
    def file_sets(self, request, applied_to_samples=[]):
1✔
1508
        linked_file_sets = set()
1✔
1509
        for sample in applied_to_samples:
1✔
1510
            sample_object = request.embed(sample, '@@object_with_select_calculated_properties?field=file_sets')
1✔
1511
            for file_set in sample_object.get('file_sets', []):
1✔
1512
                linked_file_sets.add(file_set)
1✔
1513
        if linked_file_sets:
1✔
1514
            return list(linked_file_sets)
1✔
1515

1516
    @calculated_property(
1✔
1517
        condition='file_sets',
1518
        define=True,
1519
        schema={
1520
            'title': 'Assay Titles',
1521
            'description': 'The assay titles of the file sets that used this construct library set.',
1522
            'type': 'array',
1523
            'minItems': 1,
1524
            'uniqueItems': True,
1525
            'items': {
1526
                'title': 'Assay Title',
1527
                'type': 'string'
1528
            },
1529
            'notSubmittable': True
1530
        })
1531
    def assay_titles(self, request, file_sets=[]):
1✔
1532
        assay_titles = set()
1✔
1533
        for file_set in file_sets:
1✔
1534
            if file_set.startswith('/measurement-sets/'):
1✔
1535
                file_set_object = request.embed(file_set, '@@object?skip_calculated=true')
1✔
1536
                preferred_assay_title = file_set_object.get('preferred_assay_title')
1✔
1537
                if preferred_assay_title:
1✔
1538
                    assay_titles.add(preferred_assay_title)
1✔
1539
        return list(assay_titles)
1✔
1540

1541
    @calculated_property(
1✔
1542
        schema={
1543
            'title': 'Summary',
1544
            'type': 'string',
1545
            'notSubmittable': True,
1546
        }
1547
    )
1548
    def summary(self, request, file_set_type, scope, selection_criteria, small_scale_gene_list=None, large_scale_gene_list=None, guide_type=None,
1✔
1549
                small_scale_loci_list=None, large_scale_loci_list=None, exon=None, tile=None, orf_list=None, associated_phenotypes=None,
1550
                control_type=None, targeton=None, assay_titles=[], integrated_content_files=[]):
1551
        library_type = file_set_type
1✔
1552
        target_phrase = ''
1✔
1553
        pheno_terms = []
1✔
1554
        pheno_phrase = ''
1✔
1555
        preposition = ''
1✔
1556
        pool_phrase = ''
1✔
1557
        criteria = []
1✔
1558
        criteria = criteria + selection_criteria
1✔
1559

1560
        if library_type == 'guide library':
1✔
1561
            if guide_type == 'sgRNA':
1✔
1562
                library_type = 'guide (sgRNA) library'
1✔
1563
            if guide_type == 'pgRNA':
1✔
1564
                library_type = 'guide (pgRNA) library'
×
1565

1566
        if scope == 'control':
1✔
1567
            return f'{control_type} {library_type}'
1✔
1568
        if scope == 'loci':
1✔
1569
            if small_scale_loci_list and len(small_scale_loci_list) > 1:
1✔
1570
                target_phrase = f' {len(small_scale_loci_list)} genomic loci'
1✔
1571
            elif large_scale_loci_list:
1✔
1572
                target_phrase = f' many genomic loci'
1✔
1573
            else:
1574
                target_phrase = f' a genomic locus'
1✔
1575
        if scope == 'genes':
1✔
1576
            if small_scale_gene_list and len(small_scale_gene_list) > 1:
1✔
1577
                target_phrase = f' {len(small_scale_gene_list)} genes'
1✔
1578
            elif small_scale_gene_list and len(small_scale_gene_list) == 1:
1✔
1579
                gene_object = request.embed(small_scale_gene_list[0], '@@object?skip_calculated=true')
1✔
1580
                gene_name = (gene_object.get('symbol'))
1✔
1581
                target_phrase = f' {gene_name}'
1✔
1582
            elif large_scale_gene_list:
1✔
1583
                target_phrase = f' many genes'
1✔
1584
        if scope == 'exon':
1✔
1585
            if small_scale_gene_list and len(small_scale_gene_list) > 1:
1✔
1586
                target_phrase = f' exon {exon} of multiple genes'
1✔
1587
            elif small_scale_gene_list and len(small_scale_gene_list) == 1:
1✔
1588
                gene_object = request.embed(small_scale_gene_list[0], '@@object?skip_calculated=true')
1✔
1589
                gene_name = (gene_object.get('symbol'))
1✔
1590
                target_phrase = f' exon {exon} of {gene_name}'
1✔
1591
        if scope == 'interactors':
1✔
1592
            if orf_list and len(orf_list) > 1:
1✔
1593
                target_phrase = f' {len(orf_list)} open reading frames'
1✔
1594
            elif small_scale_gene_list and len(small_scale_gene_list) == 1:
1✔
1595
                gene_object = request.embed(small_scale_gene_list[0], '@@object?skip_calculated=true')
1✔
1596
                orf_object = request.embed(orf_list[0], '@@object?skip_calculated=true')
1✔
1597
                gene_name = (gene_object.get('symbol'))
1✔
1598
                orf_id = (orf_object.get('orf_id'))
1✔
1599
                target_phrase = f' open reading frame {orf_id} of {gene_name}'
1✔
1600
        if scope == 'tile':
1✔
1601
            tile_id = tile['tile_id']
1✔
1602
            start = tile['tile_start']
1✔
1603
            end = tile['tile_end']
1✔
1604
            if small_scale_gene_list and len(small_scale_gene_list) > 1:
1✔
1605
                target_phrase = f' tile {tile_id} of multiple genes'
1✔
1606
            elif small_scale_gene_list and len(small_scale_gene_list) == 1:
1✔
1607
                gene_object = request.embed(small_scale_gene_list[0], '@@object?skip_calculated=true')
1✔
1608
                gene_name = (gene_object.get('symbol'))
1✔
1609
                target_phrase = f' tile {tile_id} of {gene_name} (AA {start}-{end})'
1✔
1610
        if scope == 'genome-wide':
1✔
1611
            target_phrase = ' genome-wide'
1✔
1612
        if scope == 'targeton':
1✔
1613
            gene_object = request.embed(small_scale_gene_list[0], '@@object?skip_calculated=true')
1✔
1614
            gene_name = gene_object.get('symbol', '')
1✔
1615
            target_phrase = f' {targeton} of {gene_name}'
1✔
1616

1617
        if associated_phenotypes:
1✔
1618
            for pheno in associated_phenotypes:
1✔
1619
                pheno_object = request.embed(pheno, '@@object?skip_calculated=true')
1✔
1620
                term_name = (pheno_object.get('term_name'))
1✔
1621
                pheno_terms.append(term_name)
1✔
1622
            if len(pheno_terms) in [1, 2]:
1✔
1623
                phenos = ' and '.join(pheno_terms)
1✔
1624
                pheno_phrase = f' associated with {phenos}'
1✔
1625
            else:
1626
                pheno_phrase = f' associated with {len(pheno_terms)} phenotypes'
×
1627

1628
        if assay_titles and 'STARR-seq' in assay_titles:
1✔
1629
            thousand_genomes_ids = set()
1✔
1630
            for integrated_content_file in integrated_content_files:
1✔
1631
                integrated_content_file_object = request.embed(integrated_content_file, '@@object?skip_calculated=true')
1✔
1632
                file_set_object = request.embed(
1✔
1633
                    integrated_content_file_object['file_set'], '@@object_with_select_calculated_properties?field=donors')
1634
                donors = file_set_object.get('donors', [])
1✔
1635
                for donor in donors:
1✔
1636
                    donor_object = request.embed(donor, '@@object?skip_calculated=true')
1✔
1637
                    dbxrefs = donor_object.get('dbxrefs', [])
1✔
1638
                    for dbxref in dbxrefs:
1✔
1639
                        if dbxref.startswith('IGSR'):
1✔
1640
                            thousand_genomes_id = dbxref.split(':')[1]
1✔
1641
                            thousand_genomes_ids.add(thousand_genomes_id)
1✔
1642
            if thousand_genomes_ids:
1✔
1643
                thousand_genomes_ids = ', '.join(sorted(list(thousand_genomes_ids)))
1✔
1644
                pool_phrase = f' pooled from 1000 Genomes donors: {thousand_genomes_ids}'
1✔
1645

1646
        if file_set_type == 'expression vector library':
1✔
1647
            if 'genes' in criteria:
1✔
1648
                criteria.remove('genes')
1✔
1649
            selections = ', '.join(criteria)
1✔
1650
            if selections:
1✔
1651
                selections = f' ({selections})'
1✔
1652
            preposition = ' of'
1✔
1653
            return f'{library_type}{preposition}{target_phrase}{selections}{pheno_phrase}'
1✔
1654
        else:
1655
            selections = ', '.join(criteria)
1✔
1656
            if scope == 'genome-wide':
1✔
1657
                preposition = ''
1✔
1658
            else:
1659
                preposition = ' in'
1✔
1660
            return f'{library_type} targeting {selections}{preposition}{target_phrase}{pheno_phrase}{pool_phrase}'
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc