Coveralls logob
Coveralls logo
  • Home
  • Features
  • Pricing
  • Docs
  • Sign In

askomics / askomics / 746

28 Jun 2019 - 14:02 coverage increased (+0.003%) to 80.28%
746

Pull #354

travis-ci

9181eb84f9c35729a3bad740fb7f9d93?size=18&default=identiconweb-flow
don't logout user after 10h
Pull Request #354: some fixes

18 of 24 new or added lines in 4 files covered. (75.0%)

1 existing line in 1 file now uncovered.

4653 of 5796 relevant lines covered (80.28%)

1.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.75
/askomics/libaskomics/source_file/SourceFileGff.py
1
#!/usr/bin/python3
2
# -*- coding: utf-8 -*-
3
"""
2×
4
Classes to import data from a gff3 source files
5
"""
6

7

8
import re,os
2×
9
import datetime
2×
10
from BCBio.GFF import GFFExaminer
2×
11
from BCBio import GFF
2×
12

13
from askomics.libaskomics.source_file.SourceFile import SourceFile
2×
14
from askomics.libaskomics.utils import rreplace
2×
15

16
class SourceFileGff(SourceFile):
2×
17
    """
18
    Class representing a Gff3 Source file
19
    """
20

21
    def __init__(self, settings, session, path, uri_set=None):
2×
22

23
        SourceFile.__init__(self, settings, session, path, uri_set=uri_set)
2×
24

25
        self.type = 'gff'
2×
26

27
        self.abstraction_dict = {}
2×
28

29
        self.domain_knowledge_dict = {}
2×
30

31
        self.pos_attr_list = [
2×
32
            'position_taxon', 'position_ref', 'position_start', 'position_end',
33
            'position_strand'
34
        ]
35

36
        self.categories_list = ['position_taxon', 'position_ref', 'position_strand']
2×
37

38
        self.taxon = ''
2×
39

40
        self.entities = []
2×
41

42
        self.timestamp = datetime.datetime.now().isoformat()
2×
43

44
        self.getLabelFromUri = {}
2×
45

46
        if uri_set and len(uri_set)>0:
2×
47
            self.prefix = self.uri[0]
2×
48
        else:
49
            self.prefix=None
2×
50

51
    def set_taxon(self, taxon):
2×
52

53
        self.taxon = taxon
2×
54

55
    def set_entities(self, entities):
2×
56

57
        self.entities = entities
2×
58

59

60
    def get_entities(self):
2×
61
        """
62
        get all the entities present in a gff file
63

64
        :return: The list of all the entities
65
        :rtype: List
66
        """
67
        exam = GFFExaminer()
2×
68
        handle = open(self.path, encoding="utf-8", errors="ignore")
2×
69
        entities = []
2×
70
        gff_type = exam.available_limits(handle)['gff_type']
2×
71
        for ent in gff_type:
2×
72
            entities.append(ent[0])
2×
73

74
        handle.close()
2×
75

76
        return entities
2×
77

78

79
    def get_turtle(self):
2×
80
        """
81
        Get turtle string for a gff file
82
        """
83

84
        self.log.debug(self.path)
2×
85
        handle = open(self.path, encoding="utf-8", errors="ignore")
2×
86

87
        # To suffix all biological element without ID and try to have unique ID
88
        suffixURI = os.path.splitext(os.path.basename(self.path))[0]
2×
89

90
        limit = dict(gff_type=self.entities)
2×
91

92
        regex = re.compile(r'.*:')
2×
93
        ttl = ''
2×
94
        #Keep type of each entities to be able to build abstraction for 'Parent' relation
95
        type_entities = {}
2×
96
        icount = {}
2×
97
        lEntities = {}
2×
98
        toBuild = []
2×
99

100
        taxon_entity = ':unknown'
2×
101
        if self.taxon != '' :
2×
102
            taxon_entity = self.encode_to_rdf_uri(self.taxon.strip(),':')
2×
103

104
        self.getLabelFromUri[taxon_entity] = self.taxon.strip()
2×
105
        self.getLabelFromUri[':plus'] = 'plus'
2×
106
        self.getLabelFromUri[':minus'] = 'minus'
2×
107
        self.getLabelFromUri[':none'] = ''
2×
108

109
        blockbase=10000
2×
110

111
        for rec in GFF.parse(handle, limit_info=limit, target_lines=1):
2×
112
            # Reference have to be common with other reference of other taxon => askomics:
113
            ref_entity =  self.encode_to_rdf_uri(str(rec.id),prefix=':')
2×
114
            if ref_entity not in self.getLabelFromUri:
2×
115
                self.getLabelFromUri[ref_entity] = str(rec.id)
2×
116

117
            for feat in rec.features:
2×
118
                # if there is no ID field, take the entity type as id
119
                type_entity = self.encode_to_rdf_uri(feat.type,prefix=self.prefix)
2×
120
                type_entity_label = feat.type
2×
121
                if type_entity not in self.getLabelFromUri:
2×
122
                    self.getLabelFromUri[type_entity] = str(feat.type)
2×
123

124
                build_entity_id = False
2×
125

126
                if feat.id != '':
2×
127
                    id_entity = self.encode_to_rdf_uri(feat.id,prefix=self.prefix)
2×
128
                    self.getLabelFromUri[id_entity] = str(feat.id)
2×
129
                else:
130
                    if not type_entity in icount:
2×
131
                        icount[type_entity] = 0
2×
132
                    icount[type_entity] += 1
2×
133

134
                    #self.log.warning("can not succed get ID feat :"+type_entity+"\n"+str(feat))
135
                    if self.taxon != '' :
2×
136
                        id_entity = self.taxon.strip() + "_" + suffixURI+ "_" + self.getLabelFromUri[type_entity] + "_"+ str(icount[type_entity])
!
137
                    else:
138
                        id_entity = suffixURI+ "_" + self.getLabelFromUri[type_entity] + "_"+ str(icount[type_entity])
2×
139

140
                    id_entity = self.encode_to_rdf_uri(id_entity,prefix=self.prefix)
2×
141
                    
142
                    build_entity_id = True
2×
143
                    self.getLabelFromUri[id_entity] = str(feat.type) + "_" + str(icount[type_entity])
2×
144

145
                start_entity = int(feat.location.start)
2×
146
                end_entity = int(feat.location.end)
2×
147
                faldo_strand =""
2×
148

149
                if int(feat.location.strand == 1):
2×
150
                    strand_entity = ':plus'
2×
151
                    faldo_strand = "faldo:ForwardStrandPosition"
2×
152
                elif int(feat.location.strand == -1):
2×
153
                    strand_entity = ':minus'
2×
154
                    faldo_strand = "faldo:ReverseStrandPosition"
2×
155
                else:
NEW
156
                    strand_entity = ':none'
!
157
                    faldo_strand = "faldo:BothStrandPosition"
!
158

159
                block_idxstart = int(start_entity) // blockbase
2×
160
                block_idxend = (int(end_entity) // blockbase)
2×
161
                listSliceRef = []
2×
162
                listSlice = []
2×
163
                for sliceb in range(block_idxstart,block_idxend+1):
2×
164
                        listSliceRef.append(self.encode_to_rdf_uri(":"+str(rec.id)+"_"+str(sliceb)))
2×
165
                        listSlice.append(str(sliceb))
2×
166

167
                attribute_dict = {
2×
168
                    'rdf:type':  [type_entity],
169
                    'askomics:position_taxon' : [taxon_entity],
170
                    'askomics:position_ref'   : [ref_entity],
171
                    'askomics:position_start' : [start_entity],
172
                    'askomics:position_end'   : [end_entity],
173
                    'askomics:position_strand': [strand_entity],
174
                    'askomics:blockstart'     : [str(block_idxstart*blockbase)],
175
                    'askomics:blockend'       : [str(block_idxend*blockbase)],
176
                    'askomics:IsIncludeInRef' : listSliceRef,
177
                    'askomics:IsIncludeIn'    : listSlice,
178
                    'faldo:location' : ["[ a faldo:Region ;\n"+
179
                                        "    faldo:begin [ a faldo:ExactPosition;\n"+
180
                                        "                  a "+faldo_strand+";\n"+
181
                                        "                  faldo:position "+str(start_entity)+";\n"+
182
                                        "                  faldo:reference "+ref_entity+" ];\n"+
183
                                        "    faldo:end [ a faldo:ExactPosition;\n"+
184
                                        "                a "+faldo_strand+";\n"+
185
                                        "                  faldo:position "+str(end_entity)+";\n"+
186
                                        "                  faldo:reference "+ref_entity+" ]"+
187
                                        "]"],
188
                    'rdfs:label' : ['\"'+self.decode_to_rdf_uri(self.getLabelFromUri[id_entity],prefix=self.prefix)+'\"^^xsd:string']
189
                }
190

191
                # Abstraction
192
                if type_entity not in self.abstraction_dict.keys():
2×
193
                    self.abstraction_dict[type_entity] = {'pos_attr': self.pos_attr_list, 'normal_attr' : []}
2×
194

195
                # Domain knowledge ---------------------------------------------------------------
196
                if type_entity not in self.domain_knowledge_dict.keys():
2×
197
                    self.domain_knowledge_dict[type_entity] = {'category' : {}}
2×
198

199
                if self.domain_knowledge_dict[type_entity]['category'] == {}:
2×
200
                    for category in self.categories_list:
2×
201
                        self.domain_knowledge_dict[type_entity]['category'][category] = []
2×
202

203
                # Strand
204
                if strand_entity not in self.domain_knowledge_dict[type_entity]['category']['position_strand']:
2×
205
                    self.domain_knowledge_dict[type_entity]['category']['position_strand'].append(strand_entity)
2×
206
                # taxon
207
                if taxon_entity not in self.domain_knowledge_dict[type_entity]['category']['position_taxon']:
2×
208
                    self.domain_knowledge_dict[type_entity]['category']['position_taxon'].append(taxon_entity)
2×
209
                # ref
210
                if ref_entity not in self.domain_knowledge_dict[type_entity]['category']['position_ref']:
2×
211
                    self.domain_knowledge_dict[type_entity]['category']['position_ref'].append(ref_entity)
2×
212

213
                # ---------------------------------------------------------------------------------
214
                buildLater = False
2×
215
                for qualifier_key, qualifier_value in feat.qualifiers.items():
2×
216
                    keyuri = self.encode_to_rdf_uri(qualifier_key,prefix=self.prefix)
2×
217
                  
218
                    attribute_dict[keyuri] = []
2×
219

220
                    for val in qualifier_value:
2×
221
                        valuri = self.encode_to_rdf_uri(val,prefix=self.prefix)
2×
222

223
                        if qualifier_key == 'ID':
2×
224
                            if (valuri not in type_entities) and type_entity != '':
2×
225
                                type_entities[valuri] = type_entity_label
2×
226

227
                            attribute_dict['rdfs:label'] = ['\"'+ str(val) +'\"^^xsd:string']
2×
228

229
                        elif qualifier_key in ['Parent', 'Derives_from']:
2×
230
                            qualifier_key_uri = self.encode_to_rdf_uri(qualifier_key,prefix=self.prefix)
2×
231
                            if not valuri in type_entities:
2×
232
                                #raise ValueError("Unknown "+qualifier_key+" ID ["+val+"]")
233
                                #build later
234
                                buildLater = True
2×
235
                                if not qualifier_key in attribute_dict:
2×
236
                                    attribute_dict[qualifier_key_uri] = []
2×
237
                                attribute_dict[qualifier_key_uri].append(valuri)
2×
238
                            else:
239

240
                                keyuri = self.encode_to_rdf_uri(qualifier_key+"_"+type_entities[valuri],prefix=self.prefix)
2×
241
                                if not keyuri in attribute_dict:
2×
242
                                    attribute_dict[keyuri] = []
2×
243

244
                                attribute_dict[keyuri].append(str(valuri))
2×
245
                                # Store the parent relation in abstraction
246
                                DomAndRange = {keyuri : self.encode_to_rdf_uri(type_entities[valuri],prefix=self.prefix) }
2×
247
                                if DomAndRange not in self.abstraction_dict[type_entity]['normal_attr']:
2×
248
                                    self.abstraction_dict[type_entity]['normal_attr'].append(DomAndRange)
2×
249
                        else:
250
                            attribute_dict[keyuri].append(str('\"' + val + '\"^^xsd:string'))
2×
251
                            # store normal attr in abstraction
252
                            if keyuri not in self.abstraction_dict[type_entity]['normal_attr']:
2×
253
                                self.abstraction_dict[type_entity]['normal_attr'].append(keyuri)
2×
254

255
                if build_entity_id:
2×
256
                    if str(attribute_dict) in lEntities :
2×
257
                        continue
!
258

259
                    lEntities[str(attribute_dict)]="0"
2×
260

261
                if not buildLater :
2×
262
                    entity = {id_entity: attribute_dict}
2×
263
                    yield self.get_content_ttl(entity)
2×
264
                else:
265
                    toBuild.append([id_entity,attribute_dict])
2×
266

267
        for elt in toBuild:
2×
268
            id_entity = elt[0]
2×
269
            attribute_dict = elt[1]
2×
270

271
            for qualifier_key in ['Parent','Derives_from']:
2×
272
                if qualifier_key in attribute_dict:
2×
273
                    for valuri in attribute_dict[qualifier_key]:
!
274
                        if not valuri in type_entities:
!
275
                            self.log.warning("Unknown "+qualifier_key+" ID ["+self.decode_to_rdf_uri(valuri,prefix=self.prefix)+"]. Certainly because this element have not been selected.")
!
276
                            continue
!
277
                        keyuri = self.encode_to_rdf_uri(qualifier_key+"_"+type_entities[valuri],prefix=self.prefix)
!
278
                        attribute_dict[keyuri] = str(valuri)
!
279
                        # Store the parent relation in abstraction
280
                        DomAndRange = {keyuri : self.encode_to_rdf_uri(type_entities[valuri],prefix=self.prefix) }
!
281
                        if DomAndRange not in self.abstraction_dict[type_entity]['normal_attr']:
!
282
                            self.abstraction_dict[type_entity]['normal_attr'].append(DomAndRange)
!
283
                        del attribute_dict[qualifier_key]
!
284
                entity = {id_entity: attribute_dict}
2×
285
                yield self.get_content_ttl(entity)
2×
286

287
        handle.close()
2×
288

289
    def get_content_ttl(self, entity):
2×
290
        """
291
        Get the ttl string for an entity
292
        """
293

294
        for id_entity, attribute_dict in entity.items():
2×
295
            first = True
2×
296

297
            ttl = id_entity
2×
298
            indent = len(id_entity) * ' ' + ' '
2×
299
            for key, attr in attribute_dict.items():
2×
300
                if len(attr) <= 0 : # empty attr, dont insert triple
2×
301
                    continue
2×
302
                for v in attr:
2×
303
                    if first:
2×
304
                        ttl += ' ' + str(key) + ' ' + str(v) + ' ;\n'
2×
305
                        first = False
2×
306
                    else:
307
                        ttl += indent +  str(key) + ' ' + str(v) + ' ;\n'
2×
308

309
        ttl += '\n'
2×
310

311
        ttl = rreplace(ttl, ';', '.', 1)
2×
312
        return ttl
2×
313

314
    def get_abstraction(self):
2×
315
        """
316
        Get Abstraction (turtle) of the GFF
317
        """
318

319
        order_dict = {
2×
320
            'Name': '2',
321
            'position_ref': '3',
322
            'position_start': '4',
323
            'position_end': '5',
324
            'position_strand': '6',
325
            'position_taxon': '7'
326
        }
327

328
        ttl =  '#################\n'
2×
329
        ttl += '#  Abstraction  #\n'
2×
330
        ttl += '#################\n\n'
2×
331
        ttl += '\n'
2×
332
        ttl += 'rdfs:label rdf:type owl:DatatypeProperty .\n'
2×
333
        ttl += 'rdfs:label askomics:attribute "true"^^xsd:boolean .\n'
2×
334
        ttl += 'rdfs:label askomics:attributeOrder "1"^^xsd:decimal .\n'
2×
335
        ttl += 'rdfs:label rdfs:label "label" .\n'
2×
336
        ttl += 'rdfs:label rdfs:range xsd:string .\n'
2×
337
        ttl += '\n'
2×
338

339
        for entity, attribute_dict in self.abstraction_dict.items():
2×
340
            ttl += entity + ' ' + 'rdf:type owl:Class ;\n'
2×
341
            indent = len(entity) * ' ' + ' '
2×
342
            ttl += indent + 'rdfs:label \"' + self.decode_to_rdf_uri(entity,prefix=self.prefix) + "\"^^xsd:string ;\n"
2×
343
            ttl += indent + 'askomics:startPoint \"true\"^^xsd:boolean ;\n'
2×
344
            ttl += indent + 'askomics:entity \"true\"^^xsd:boolean .\n\n'
2×
345

346
            ttl += '\n'
2×
347
            ttl += indent + 'rdfs:label rdfs:domain '+entity+' .\n'
2×
348
            ttl += '\n'
2×
349

350
            for type_attr, attr_list in attribute_dict.items():
2×
351
                if type_attr == 'pos_attr': # positionable attributes
2×
352
                    for pos_attr in attr_list:
2×
353
                        if pos_attr in ('position_start', 'position_end'):
2×
354
                            ttl += self.encode_to_rdf_uri('askomics:'+pos_attr) + ' askomics:attribute \"true\"^^xsd:boolean ;\n'
2×
355
                            indent = len(pos_attr) * ' ' + '  '
2×
356
                            ttl += indent + 'rdf:type owl:DatatypeProperty ;\n'
2×
357
                            ttl += indent + 'rdfs:label \"' + pos_attr.replace('position_', '') + '\"^^xsd:string ;\n'
2×
358
                            ttl += indent + 'rdfs:domain ' + entity + ' ;\n'
2×
359
                            ttl += indent + 'rdfs:range xsd:decimal .\n\n'
2×
360
                            ttl += self.encode_to_rdf_uri('askomics:'+pos_attr) + ' askomics:attributeOrder "' + order_dict[pos_attr] + '"^^xsd:decimal .\n'
2×
361
                        else:
362
                            # No taxon, don't write triple and continue loop
363
                            if pos_attr == 'position_taxon' and self.taxon == '':
2×
364
                                continue
2×
365
                            ttl += self.encode_to_rdf_uri('askomics:'+pos_attr) + ' askomics:attribute \"true\"^^xsd:boolean ;\n'
2×
366
                            indent = len(pos_attr) * ' ' + '  '
2×
367
                            ttl += indent + 'rdf:type owl:ObjectProperty ;\n'
2×
368
                            ttl += indent + 'rdfs:label \"' + pos_attr.replace('position_', '') + '\"^^xsd:string ;\n'
2×
369
                            ttl += indent + 'rdfs:domain ' + entity + ' ;\n'
2×
370
                            ttl += indent + 'rdfs:range ' + self.encode_to_rdf_uri('askomics:'+pos_attr.replace('position_', '')+ "Category") + ".\n\n"
2×
371
                            ttl += self.encode_to_rdf_uri('askomics:'+pos_attr) + ' askomics:attributeOrder "' + order_dict[pos_attr] + '"^^xsd:decimal .\n'
2×
372
                else: # other attributes
373
                    for attr in attr_list:
2×
374
                        if isinstance(attr, dict): # Parent relation
2×
375
                            for key, value in attr.items():
2×
376
                                ttl += key + ' rdf:type owl:ObjectProperty ;\n'
2×
377
                                indent = len(key) * ' ' + '  '
2×
378
                                ttl += indent + 'rdfs:label \"' + self.decode_to_rdf_uri(key,prefix=self.prefix) + '\"^^xsd:string ;\n'
2×
379
                                ttl += indent + 'rdfs:domain ' + entity + " ;\n"
2×
380
                                ttl += indent + 'rdfs:range ' + value + ' .\n\n'
2×
381
                        else: # normal attributes
382
                            ttl += attr + ' askomics:attribute \"true\"^^xsd:boolean ;\n'
2×
383
                            indent = len(attr) * ' ' + '  '
2×
384
                            ttl += indent + 'rdf:type owl:DatatypeProperty ;\n'
2×
385
                            ttl += indent + 'rdfs:label \"' + self.decode_to_rdf_uri(attr,prefix=self.prefix) + '\"^^xsd:string ;\n'
2×
386
                            ttl += indent + 'rdfs:domain ' + entity + " ;\n"
2×
387
                            ttl += indent + 'rdfs:range xsd:string .\n\n'
2×
388
                            if attr == 'Name':
2×
389
                                ttl += attr + ' askomics:attributeOrder "' + order_dict[attr] + '"^^xsd:decimal .\n'
!
390
        #print(ttl)
391
        return ttl
2×
392

393
    def get_domain_knowledge(self):
2×
394
        """
395
        Get Domain Knowledge (turtle) of the GFF
396
        """
397

398
        ttl =  '######################\n'
2×
399
        ttl += '#  Domain knowledge  #\n'
2×
400
        ttl += '######################\n\n'
2×
401

402
        for entity, dk_dict in self.domain_knowledge_dict.items():
2×
403
            # Positionable entity
404
            ttl += self.encode_to_rdf_uri(entity,prefix=self.prefix) + ' askomics:is_positionable \"true\"^^xsd:boolean .\n'
2×
405
            ttl += 'askomics:is_positionable rdfs:label \'is_positionable\'^^xsd:string .\n'
2×
406
            ttl += 'askomics:is_positionable rdf:type owl:ObjectProperty .\n\n'
2×
407

408
            for category_dict in dk_dict.values():
2×
409
                for category, cat_list in category_dict.items():
2×
410
                    # dont write triple for taxon if user don't enter one
411
                    if category == 'position_taxon' and self.taxon == '':
2×
412
                        continue
2×
413
                    for cat in cat_list:
2×
414
                        if self.getLabelFromUri[cat] == '':
2×
415
                            continue
!
416
                        ttl += self.encode_to_rdf_uri('askomics:'+str(category.replace('position_', ''))+'Category') + ' askomics:category ' + str(cat) + ' .\n'
2×
417
                        ttl += str(cat) + ' rdf:type ' + self.encode_to_rdf_uri('askomics:'+str(category.replace('position_', ''))) + ' ;\n'
2×
418
                        indent = len(str(cat)) * ' ' + ' '
2×
419
                        ttl += indent + 'rdfs:label \"' + self.getLabelFromUri[cat] + '\"^^xsd:string .\n'
2×
420

421
            ttl += '\n'
2×
422
        #print(ttl)
423
        return ttl
2×
Troubleshooting · Open an Issue · Sales · Support · ENTERPRISE · CAREERS · STATUS
BLOG · TWITTER · Legal & Privacy · Supported CI Services · What's a CI service? · Automated Testing

© 2022 Coveralls, Inc