• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

askomics / flaskomics / 6590008757

20 Oct 2023 03:58PM UTC coverage: 83.758% (+0.4%) from 83.31%
6590008757

push

github-actions

web-flow
Merge pull request #420 from askomics/dev

Release 4.5.0

633 of 633 new or added lines in 29 files covered. (100.0%)

6240 of 7450 relevant lines covered (83.76%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.07
/askomics/libaskomics/CsvFile.py
1
import csv
1✔
2
import re
1✔
3
import rdflib
1✔
4
import sys
1✔
5
import traceback
1✔
6
from dateutil import parser
1✔
7

8
from rdflib import BNode
1✔
9

10
from askomics.libaskomics.File import File
1✔
11
from askomics.libaskomics.OntologyManager import OntologyManager
1✔
12
from askomics.libaskomics.Utils import cached_property
1✔
13

14

15
class CsvFile(File):
1✔
16
    """CSV file
17

18
    Attributes
19
    ----------
20
    category_values : dict
21
        Category values
22
    columns_type : list
23
        Columns type
24
    header : list
25
        Header
26
    preview : list
27
        Previex
28
    public : bool
29
        Public
30
    """
31

32
    def __init__(self, app, session, file_info, host_url=None, external_endpoint=None, custom_uri=None, external_graph=None):
1✔
33
        """init
34

35
        Parameters
36
        ----------
37
        app : Flask
38
            Flask app
39
        session :
40
            AskOmics session
41
        file_info : dict
42
            file info
43
        host_url : None, optional
44
            AskOmics url
45
        """
46
        File.__init__(self, app, session, file_info, host_url, external_endpoint=external_endpoint, custom_uri=custom_uri, external_graph=external_graph)
1✔
47
        self.preview_limit = 30
1✔
48
        try:
1✔
49
            self.preview_limit = self.settings.getint("askomics", "npreview")
1✔
50
        except Exception:
1✔
51
            pass
1✔
52
        self.header = []
1✔
53
        self.preview = []
1✔
54
        self.columns_type = []
1✔
55
        self.category_values = {}
1✔
56

57
    def set_preview(self):
1✔
58
        """Set previex, header and columns type by sniffing the file"""
59
        self.set_preview_and_header()
1✔
60
        self.set_columns_type()
1✔
61

62
    def get_preview(self):
1✔
63
        """Get a preview of the file
64

65
        Returns
66
        -------
67
        dict
68
            File preview
69
        """
70
        return {
1✔
71
            'type': self.type,
72
            'id': self.id,
73
            'name': self.human_name,
74
            'error': self.error,
75
            'error_message': self.error_message,
76
            'data': {
77
                'header': self.header,
78
                'content_preview': self.preview,
79
                'columns_type': self.columns_type
80
            }
81
        }
82

83
    def force_columns_type(self, forced_columns_type):
1✔
84
        """Set the columns type without detecting them
85

86
        Parameters
87
        ----------
88
        forced_columns_type : list
89
            columns type
90
        """
91
        self.columns_type = forced_columns_type
1✔
92

93
    def force_header_names(self, forced_header_names):
1✔
94
        """Set the columns type without detecting them
95

96
        Parameters
97
        ----------
98
        forced_columns_type : list
99
            columns type
100
        """
101
        self.header = forced_header_names
×
102

103
    def set_preview_and_header(self):
1✔
104
        """Set the preview and header by looking in the fists lines of the file"""
105
        try:
1✔
106
            with open(self.path, 'r', encoding='utf-8') as csv_file:
1✔
107
                reader = csv.reader(csv_file, dialect=self.dialect)
1✔
108
                count = 0
1✔
109
                # Store header
110
                header = next(reader)
1✔
111
                self.header = [h.strip() for h in header]
1✔
112
                if not all(self.header):
1✔
113
                    raise Exception("Empty column in header")
1✔
114

115
                # Loop on lines
116
                preview = []
1✔
117
                for row in reader:
1✔
118
                    res_row = {}
1✔
119
                    res_row = dict.fromkeys(self.header, "")
1✔
120
                    for i, cell in enumerate(row):
1✔
121
                        res_row[self.header[i]] = cell
1✔
122
                    preview.append(res_row)
1✔
123

124
                    # Stop after x lines
125
                    if self.preview_limit:
1✔
126
                        count += 1
1✔
127
                        if count > self.preview_limit:
1✔
128
                            break
×
129
            self.preview = preview
1✔
130

131
        except Exception as e:
1✔
132
            self.error = True
1✔
133
            self.error_message = "Malformated CSV/TSV ({})".format(str(e))
1✔
134
            traceback.print_exc(file=sys.stdout)
1✔
135

136
    def set_columns_type(self):
1✔
137
        """Set the columns type by guessing them"""
138
        index = 0
1✔
139
        for col in self.transposed_preview:
1✔
140
            self.columns_type.append(self.guess_column_type(col, index))
1✔
141
            index += 1
1✔
142
        # check coltypes
143
        self.check_columns_types()
1✔
144

145
    def check_columns_types(self):
1✔
146
        """Check all columns type after detection and correct them"""
147
        # Change start and end into numeric if here is not only one start and one end
148
        if not (self.columns_type.count("start") == 1 and self.columns_type.count("end") == 1):
1✔
149
            self.columns_type = ["numeric" if ctype in ("start", "end") else ctype for ctype in self.columns_type]
1✔
150
        # Change ref into text if their is more than one
151
        if not self.columns_type.count("reference") == 1:
1✔
152
            self.columns_type = ["text" if ctype == "reference" else ctype for ctype in self.columns_type]
1✔
153
        # Change strand into text if their is more than one
154
        if not self.columns_type.count("strand") == 1:
1✔
155
            self.columns_type = ["text" if ctype == "strand" else ctype for ctype in self.columns_type]
1✔
156

157
    def is_category(self, values):
1✔
158
        """Check if a list af values are categories
159

160
        Parameters
161
        ----------
162
        values : list
163
            List of values
164

165
        Returns
166
        -------
167
        bool
168
            True if values are categories
169
        """
170
        return len(set(list(filter(None, values)))) <= int(len(list(filter(None, values))) / 3)
×
171

172
    def guess_column_type(self, values, header_index):
1✔
173
        """Guess the columns type
174

175
        Parameters
176
        ----------
177
        values : list
178
            columns preview
179
        header_index : int
180
            Header index
181

182
        Returns
183
        -------
184
        string
185
            The guessed type
186
        """
187
        # First col is entity start
188
        if header_index == 0:
1✔
189
            return "start_entity"
1✔
190

191
        # if name contain @, this is a relation
192
        if self.header[header_index].find("@") > 0:
1✔
193
            return "general_relation"
×
194

195
        # If it matches "label"
196
        if header_index == 1 and re.match(r".*label.*", self.header[header_index].lower(), re.IGNORECASE) is not None:
1✔
197
            return "label"
1✔
198

199
        special_types = {
1✔
200
            'reference': ('chr', 'ref', 'scaff'),
201
            'strand': ('strand', ),
202
            'start': ('start', 'begin'),
203
            'end': ('end', 'stop'),
204
            'date': ('date', 'time', 'birthday', 'day')
205
        }
206

207
        # First, detect boolean values
208
        if self.are_boolean(values):
1✔
209
            return "boolean"
×
210

211
        # Then, detect special type with header
212
        for stype, expressions in special_types.items():
1✔
213
            # Need to check once if it matches any subtype
214
            expression_regexp = "|".join([".*{}.*".format(expression.lower()) for expression in expressions])
1✔
215
            if re.match(expression_regexp, self.header[header_index].lower(), re.IGNORECASE) is not None:
1✔
216
                # Test if start and end are numerical
217
                if stype in ('start', 'end') and not all(self.is_decimal(val) for val in values):
1✔
218
                    break
×
219
                # test if strand is a category with 3 elements max
220
                if stype == 'strand' and len(set(list(filter(None, values)))) > 3:
1✔
221
                    break
×
222
                # Test if date respects a date format
223
                if stype == 'date' and not all(self.is_date(val) for val in values):
1✔
224
                    break
×
225
                return stype
1✔
226

227
        # Then, check goterm
228
        # if all((val.startswith("GO:") and val[3:].isdigit()) for val in values):
229
        #     return "goterm"
230

231
        # If header contain ID, it is text
232
        if re.match(r".*ID.*", self.header[header_index]) is not None:
1✔
233
            return "text"
×
234

235
        # Finaly, check numerical/text
236
        if all(self.is_decimal(val) for val in values):
1✔
237
            if all(val == "" for val in values):
1✔
238
                return "text"
1✔
239
            return "numeric"
×
240

241
        return "text"  # default
1✔
242

243
    @staticmethod
1✔
244
    def are_boolean(values):
1✔
245
        """Check if a list of values are boolean strings
246

247
        Parameters
248
        ----------
249
        values : list
250
            List of strings
251

252
        Returns
253
        -------
254
        boolean
255
            True if values are boolean strings (true false or 0 1)
256
        """
257
        return set(list(filter(None, [value.lower() for value in values]))) in ({'false', 'true'}, {'0', '1'})
1✔
258

259
    @staticmethod
1✔
260
    def is_decimal(value):
1✔
261
        """Guess if a variable if a number
262

263
        Parameters
264
        ----------
265
        value :
266
            The var to test
267

268
        Returns
269
        -------
270
        boolean
271
            True if it's decimal
272
        """
273
        if value == "":
1✔
274
            return True
×
275
        if value.isdigit():
1✔
276
            return True
1✔
277
        else:
278
            try:
1✔
279
                float(value)
1✔
280
                return True
×
281
            except ValueError:
1✔
282
                return False
1✔
283

284
    @staticmethod
1✔
285
    def is_date(value):
1✔
286
        """Guess if a variable is a date
287

288
        Parameters
289
        ----------
290
        value :
291
            The var to test
292

293
        Returns
294
        -------
295
        boolean
296
            True if it's a date
297
        """
298
        if value == "":
1✔
299
            return True
×
300
        try:
1✔
301
            parser.parse(value, dayfirst=True).date()
1✔
302
            return True
1✔
303
        except Exception:
×
304
            return False
×
305

306
    @property
1✔
307
    def transposed_preview(self):
1✔
308
        """Transpose the preview
309

310
        Returns
311
        -------
312
        list
313
            Transposed preview
314
        """
315
        data = [[] for x in range(len(self.header))]
1✔
316
        for row in self.preview:
1✔
317
            for key, value in row.items():
1✔
318
                data[self.header.index(key)].append(value)
1✔
319
        return data
1✔
320

321
    @cached_property
1✔
322
    def dialect(self):
1✔
323
        """Csv dialect
324

325
        Returns
326
        -------
327
        TYPE
328
            dialect
329
        """
330
        with open(self.path, 'r', encoding="utf-8", errors="ignore") as tabfile:
1✔
331
            # The sniffer needs to have enough data to guess,
332
            # and we restrict to a list of allowed delimiters to avoid strange results
333
            contents = tabfile.readline()
1✔
334
            dialect = csv.Sniffer().sniff(contents, delimiters=';,\t ')
1✔
335
            return dialect
1✔
336

337
    def integrate(self, dataset_id, forced_columns_type=None, forced_header_names=None, public=False):
1✔
338
        """Integrate the file
339

340
        Parameters
341
        ----------
342
        forced_columns_type : list
343
            columns type
344
        public : bool, optional
345
            True if dataset will be public
346
        """
347
        self.public = public
1✔
348
        self.set_preview_and_header()
1✔
349
        if forced_columns_type:
1✔
350
            self.force_columns_type(forced_columns_type)
1✔
351
        else:
352
            self.set_columns_type()
×
353
        if forced_header_names:
1✔
354
            self.force_header_names(forced_header_names)
×
355
        File.integrate(self, dataset_id=dataset_id)
1✔
356

357
    def set_rdf_abstraction_domain_knowledge(self):
1✔
358
        """Set intersection of abstraction and domain knowledge"""
359
        self.set_rdf_abstraction()
1✔
360
        self.set_rdf_domain_knowledge()
1✔
361

362
    def set_rdf_domain_knowledge(self):
1✔
363
        """Set the domain knowledge"""
364
        for index, attribute in enumerate(self.header):
1✔
365
            if self.columns_type[index] in ('category', 'reference', 'strand') and self.header[index] in self.category_values:
1✔
366
                s = self.namespace_data["{}Category".format(self.format_uri(attribute, remove_space=True))]
1✔
367
                p = self.namespace_internal["category"]
1✔
368
                for value in self.category_values[self.header[index]]:
1✔
369
                    o = self.rdfize(value)
1✔
370
                    if self.columns_type[index] == "strand":
1✔
371
                        o = self.get_faldo_strand(value)
1✔
372
                    self.graph_abstraction_dk.add((s, p, o))
1✔
373
                    self.graph_abstraction_dk.add((o, rdflib.RDF.type, self.namespace_data["{}CategoryValue".format(self.format_uri(self.header[index]))]))
1✔
374
                    self.graph_abstraction_dk.add((o, rdflib.RDFS.label, rdflib.Literal(value)))
1✔
375

376
    def set_rdf_abstraction(self):
1✔
377
        """Set the abstraction"""
378
        # Entity
379
        # Check subclass syntax (<)
380
        if self.header[0].find('<') > 0:
1✔
381
            splitted = self.header[0].split('<')
×
382
            entity = self.rdfize(splitted[0])
×
383
            entity_label = rdflib.Literal(splitted[0])
×
384
            mother_class = self.rdfize(splitted[1])
×
385
            # subClassOf
386
            self.graph_abstraction_dk.add((entity, rdflib.RDFS.subClassOf, mother_class))
×
387
        else:
388
            entity = self.rdfize(self.header[0])
1✔
389
            entity_label = rdflib.Literal(self.header[0])
1✔
390

391
        self.graph_abstraction_dk.add((entity, rdflib.RDF.type, rdflib.OWL.Class))
1✔
392
        self.graph_abstraction_dk.add((entity, rdflib.RDF.type, self.namespace_internal['entity']))
1✔
393
        if self.faldo_entity:
1✔
394
            self.graph_abstraction_dk.add((entity, rdflib.RDF.type, self.namespace_internal["faldo"]))
1✔
395
        self.graph_abstraction_dk.add((entity, rdflib.RDFS.label, entity_label))
1✔
396
        if self.columns_type[0] == 'start_entity':
1✔
397
            self.graph_abstraction_dk.add((entity, rdflib.RDF.type, self.namespace_internal['startPoint']))
1✔
398

399
        available_ontologies = {}
1✔
400
        for ontology in OntologyManager(self.app, self.session).list_ontologies():
1✔
401
            available_ontologies[ontology['short_name']] = ontology['uri']
×
402
        attribute_blanks = {}
1✔
403

404
        # Attributes and relations
405
        for index, attribute_name in enumerate(self.header):
1✔
406

407
            symetric_relation = False
1✔
408

409
            # Skip entity
410
            if index == 0:
1✔
411
                continue
1✔
412

413
            # Skip label for second column
414
            if self.columns_type[index] == "label" and index == 1:
1✔
415
                continue
1✔
416

417
            blank = BNode()
1✔
418
            # Relation
419
            if self.columns_type[index] in ('general_relation', 'symetric_relation', 'indirect_relation'):
1✔
420
                symetric_relation = True if self.columns_type[index] == 'symetric_relation' else False
1✔
421
                indirect_relation = True if self.columns_type[index] == 'indirect_relation' else False
1✔
422
                splitted = attribute_name.split('@')
1✔
423

424
                attribute = self.rdfize(splitted[0])
1✔
425
                label = rdflib.Literal(splitted[0])
1✔
426
                rdf_range = self.rdfize(splitted[1])
1✔
427
                rdf_type = rdflib.OWL.ObjectProperty
1✔
428

429
                # New way of storing relations (starting from 4.4.0)
430

431
                endpoint = rdflib.Literal(self.external_endpoint) if self.external_endpoint else rdflib.Literal(self.settings.get('triplestore', 'endpoint'))
1✔
432
                self.graph_abstraction_dk.add((blank, rdflib.RDF.type, rdflib.OWL.ObjectProperty))
1✔
433
                self.graph_abstraction_dk.add((blank, rdflib.RDF.type, self.namespace_internal["AskomicsRelation"]))
1✔
434
                self.graph_abstraction_dk.add((blank, self.namespace_internal["uri"], attribute))
1✔
435
                self.graph_abstraction_dk.add((blank, rdflib.RDFS.label, label))
1✔
436
                self.graph_abstraction_dk.add((blank, rdflib.RDFS.domain, entity))
1✔
437
                self.graph_abstraction_dk.add((blank, rdflib.RDFS.range, rdf_range))
1✔
438
                self.graph_abstraction_dk.add((blank, rdflib.DCAT.endpointURL, endpoint))
1✔
439
                self.graph_abstraction_dk.add((blank, rdflib.DCAT.dataset, rdflib.Literal(self.name)))
1✔
440
                if symetric_relation:
1✔
441
                    self.graph_abstraction_dk.add((blank, rdflib.RDFS.domain, rdf_range))
×
442
                    self.graph_abstraction_dk.add((blank, rdflib.RDFS.range, entity))
×
443
                if indirect_relation:
1✔
444
                    self.graph_abstraction_dk.add((blank, self.namespace_internal["isIndirectRelation"], rdflib.Literal("true", datatype=rdflib.XSD.boolean)))
×
445

446
                continue
×
447

448
            # Manage ontologies
449
            if self.columns_type[index] in available_ontologies:
1✔
450

451
                attribute = self.rdfize(attribute_name)
×
452
                label = rdflib.Literal(attribute_name)
×
453
                rdf_range = self.rdfize(available_ontologies[self.columns_type[index]])
×
454
                rdf_type = rdflib.OWL.ObjectProperty
×
455

456
                # New way of storing relations (starting from 4.4.0)
457
                blank = BNode()
×
458
                endpoint = rdflib.Literal(self.external_endpoint) if self.external_endpoint else rdflib.Literal(self.settings.get('triplestore', 'endpoint'))
×
459
                self.graph_abstraction_dk.add((blank, rdflib.RDF.type, rdflib.OWL.ObjectProperty))
×
460
                self.graph_abstraction_dk.add((blank, rdflib.RDF.type, self.namespace_internal["AskomicsRelation"]))
×
461
                self.graph_abstraction_dk.add((blank, self.namespace_internal["uri"], attribute))
×
462
                self.graph_abstraction_dk.add((blank, rdflib.RDFS.label, label))
×
463
                self.graph_abstraction_dk.add((blank, rdflib.RDFS.domain, entity))
×
464
                self.graph_abstraction_dk.add((blank, rdflib.RDFS.range, rdf_range))
×
465
                self.graph_abstraction_dk.add((blank, rdflib.DCAT.endpointURL, endpoint))
×
466
                self.graph_abstraction_dk.add((blank, rdflib.DCAT.dataset, rdflib.Literal(self.name)))
×
467

468
                continue
×
469

470
            # Category
471
            elif self.columns_type[index] in ('category', 'reference', 'strand'):
1✔
472
                attribute = self.rdfize(attribute_name)
1✔
473
                label = rdflib.Literal(attribute_name)
1✔
474
                rdf_range = self.namespace_data["{}Category".format(self.format_uri(attribute_name, remove_space=True))]
1✔
475
                rdf_type = rdflib.OWL.ObjectProperty
1✔
476
                self.graph_abstraction_dk.add((blank, rdflib.RDF.type, self.namespace_internal["AskomicsCategory"]))
1✔
477

478
            # Numeric
479
            elif self.columns_type[index] in ('numeric', 'start', 'end'):
1✔
480
                attribute = self.rdfize(attribute_name)
1✔
481
                label = rdflib.Literal(attribute_name)
1✔
482
                rdf_range = rdflib.XSD.decimal
1✔
483
                rdf_type = rdflib.OWL.DatatypeProperty
1✔
484

485
            # Boolean
486
            elif self.columns_type[index] == "boolean":
1✔
487
                attribute = self.rdfize(attribute_name)
×
488
                label = rdflib.Literal(attribute_name)
×
489
                rdf_range = rdflib.XSD.boolean
×
490
                rdf_type = rdflib.OWL.DatatypeProperty
×
491

492
            # Date
493
            elif self.columns_type[index] == "date":
1✔
494
                attribute = self.rdfize(attribute_name)
1✔
495
                label = rdflib.Literal(attribute_name)
1✔
496
                rdf_range = rdflib.XSD.date
1✔
497
                rdf_type = rdflib.OWL.DatatypeProperty
1✔
498

499
            # Text (default)
500
            else:
501
                attribute = self.rdfize(attribute_name)
1✔
502
                label = rdflib.Literal(attribute_name)
1✔
503
                rdf_range = rdflib.XSD.string
1✔
504
                rdf_type = rdflib.OWL.DatatypeProperty
1✔
505

506
            attribute_blanks[attribute] = blank
1✔
507

508
            # New way of storing attributes (starting from 4.4.0)
509
            self.graph_abstraction_dk.add((blank, rdflib.RDF.type, rdf_type))
1✔
510
            self.graph_abstraction_dk.add((blank, self.namespace_internal["uri"], attribute))
1✔
511
            self.graph_abstraction_dk.add((blank, rdflib.RDFS.label, label))
1✔
512
            self.graph_abstraction_dk.add((blank, rdflib.RDFS.domain, entity))
1✔
513
            self.graph_abstraction_dk.add((blank, rdflib.RDFS.range, rdf_range))
1✔
514

515
        # Faldo:
516
        if self.faldo_entity:
1✔
517
            for key, value in self.faldo_abstraction.items():
1✔
518
                if value:
1✔
519
                    blank = attribute_blanks[value]
1✔
520
                    self.graph_abstraction_dk.add((blank, rdflib.RDF.type, self.faldo_abstraction_eq[key]))
1✔
521
                    self.graph_abstraction_dk.add((blank, self.namespace_internal["uri"], value))
1✔
522

523
    def generate_rdf_content(self):
1✔
524
        """Generator of the rdf content
525

526
        Yields
527
        ------
528
        Graph
529
            Rdf content
530
        """
531
        total_lines = sum(1 for line in open(self.path))
1✔
532

533
        available_ontologies = {}
1✔
534
        for ontology in OntologyManager(self.app, self.session).list_ontologies():
1✔
535
            available_ontologies[ontology['short_name']] = ontology['uri']
×
536

537
        with open(self.path, 'r', encoding='utf-8') as file:
1✔
538
            reader = csv.reader(file, dialect=self.dialect)
1✔
539

540
            # Skip header
541
            next(reader)
1✔
542

543
            # Entity
544
            # Check subclass syntax (<)
545
            if self.header[0].find('<') > 0:
1✔
546
                splitted = self.header[0].split('<')
×
547
                entity_type = self.rdfize(splitted[0])
×
548
            else:
549
                entity_type = self.rdfize(self.header[0])
1✔
550

551
            # Faldo
552
            self.faldo_entity = True if 'start' in self.columns_type and 'end' in self.columns_type else False
1✔
553

554
            has_label = None
1✔
555
            # Get first value, ignore others
556
            if "label" in self.columns_type and self.columns_type.index("label") == 1:
1✔
557
                has_label = True
1✔
558

559
            # Loop on lines
560
            for row_number, row in enumerate(reader):
1✔
561

562
                # Percent
563
                self.graph_chunk.percent = row_number * 100 / total_lines
1✔
564

565
                # skip blank lines
566
                if not row:
1✔
567
                    continue
×
568

569
                # Entity
570
                entity = self.rdfize(row[0], custom_namespace=self.namespace_entity)
1✔
571
                if has_label and row[1]:
1✔
572
                    label = row[1]
1✔
573
                else:
574
                    label = self.get_uri_label(row[0])
1✔
575
                self.graph_chunk.add((entity, rdflib.RDF.type, entity_type))
1✔
576
                self.graph_chunk.add((entity, rdflib.RDFS.label, rdflib.Literal(label)))
1✔
577

578
                # Faldo
579
                faldo_reference = None
1✔
580
                faldo_strand = None
1✔
581
                faldo_start = None
1✔
582
                faldo_end = None
1✔
583

584
                # Position
585
                start = None
1✔
586
                end = None
1✔
587
                reference = None
1✔
588

589
                # For attributes, loop on cell
590
                for column_number, cell in enumerate(row):
1✔
591
                    current_type = self.columns_type[column_number]
1✔
592
                    current_header = self.header[column_number]
1✔
593

594
                    attribute = None
1✔
595
                    relation = None
1✔
596
                    symetric_relation = False
1✔
597

598
                    # Skip label type for second column
599
                    # if type is label but not second column, default to string
600
                    if current_type == "label" and column_number == 1:
1✔
601
                        continue
1✔
602

603
                    # We ignore all data for indirect relations
604
                    if current_type == "indirect_relation":
1✔
605
                        continue
×
606

607
                    # Skip entity and blank cells
608
                    if column_number == 0 or (not cell and not current_type == "strand"):
1✔
609
                        continue
×
610

611
                    # Relation
612
                    if current_type in ('general_relation', 'symetric_relation'):
1✔
613
                        symetric_relation = True if current_type == 'symetric_relation' else False
1✔
614
                        splitted = current_header.split('@')
1✔
615
                        relation = self.rdfize(splitted[0])
1✔
616
                        attribute = self.rdfize(cell)
1✔
617

618
                    # Ontology
619
                    elif current_type in available_ontologies:
1✔
620
                        symetric_relation = False
×
621
                        relation = self.rdfize(current_header)
×
622
                        attribute = self.rdfize(cell)
×
623

624
                    # Category
625
                    elif current_type in ('category', 'reference', 'strand'):
1✔
626
                        potential_relation = self.rdfize(current_header)
1✔
627
                        if current_type == "strand":
1✔
628
                            # Override csv value, use "proper" values
629
                            cell = self.get_faldo_strand_label(cell)
1✔
630
                        if current_header not in self.category_values.keys():
1✔
631
                            # Add the category in dict, and the first value in a set
632
                            self.category_values[current_header] = {cell, }
1✔
633
                        else:
634
                            # add the cell in the set
635
                            self.category_values[current_header].add(cell)
1✔
636
                        if current_type == 'reference':
1✔
637
                            faldo_reference = self.rdfize(cell)
1✔
638
                            reference = cell
1✔
639
                            self.faldo_abstraction["reference"] = potential_relation
1✔
640
                        elif current_type == 'strand':
1✔
641
                            faldo_strand = self.get_faldo_strand(cell)
1✔
642
                            self.faldo_abstraction["strand"] = potential_relation
1✔
643
                        else:
644
                            relation = potential_relation
1✔
645
                            attribute = self.rdfize(cell)
1✔
646

647
                    # Numeric
648
                    elif current_type in ('numeric', 'start', 'end'):
1✔
649
                        potential_relation = self.rdfize(current_header)
1✔
650
                        if current_type == "start":
1✔
651
                            faldo_start = rdflib.Literal(self.convert_type(cell))
1✔
652
                            start = cell
1✔
653
                            self.faldo_abstraction["start"] = potential_relation
1✔
654
                        elif current_type == "end":
1✔
655
                            faldo_end = rdflib.Literal(self.convert_type(cell))
1✔
656
                            end = cell
1✔
657
                            self.faldo_abstraction["end"] = potential_relation
1✔
658
                        else:
659
                            relation = potential_relation
1✔
660
                            attribute = rdflib.Literal(self.convert_type(cell))
1✔
661

662
                    # Boolean
663
                    elif current_type == "boolean":
1✔
664
                        relation = self.rdfize(current_header)
×
665
                        if cell.lower() in ("1", "true"):
×
666
                            attribute = rdflib.Literal("true", datatype=rdflib.XSD.boolean)
×
667
                        else:
668
                            attribute = rdflib.Literal("false", datatype=rdflib.XSD.boolean)
×
669

670
                    elif current_type == "date":
1✔
671
                        relation = self.rdfize(current_header)
1✔
672
                        attribute = rdflib.Literal(self.convert_type(cell, try_date=True))
1✔
673

674
                    # default is text
675
                    else:
676
                        relation = self.rdfize(current_header)
1✔
677
                        attribute = rdflib.Literal(self.convert_type(cell))
1✔
678

679
                    if entity and relation is not None and attribute is not None:
1✔
680
                        self.graph_chunk.add((entity, relation, attribute))
1✔
681
                        if symetric_relation:
1✔
682
                            self.graph_chunk.add((attribute, relation, entity))
×
683

684
                if self.faldo_entity and faldo_start and faldo_end:
1✔
685

686
                    # Triples respecting faldo ontology
687

688
                    location = BNode()
1✔
689
                    begin_node = BNode()
1✔
690
                    end_node = BNode()
1✔
691

692
                    self.graph_chunk.add((entity, self.faldo.location, location))
1✔
693

694
                    self.graph_chunk.add((location, rdflib.RDF.type, self.faldo.region))
1✔
695
                    self.graph_chunk.add((location, self.faldo.begin, begin_node))
1✔
696
                    self.graph_chunk.add((location, self.faldo.end, end_node))
1✔
697

698
                    self.graph_chunk.add((begin_node, rdflib.RDF.type, self.faldo.ExactPosition))
1✔
699
                    self.graph_chunk.add((begin_node, self.faldo.position, faldo_start))
1✔
700

701
                    self.graph_chunk.add((end_node, rdflib.RDF.type, self.faldo.ExactPosition))
1✔
702
                    self.graph_chunk.add((end_node, self.faldo.position, faldo_end))
1✔
703

704
                    if faldo_reference:
1✔
705
                        self.graph_chunk.add((begin_node, self.faldo.reference, faldo_reference))
1✔
706
                        self.graph_chunk.add((end_node, self.faldo.reference, faldo_reference))
1✔
707

708
                    if faldo_strand:
1✔
709
                        self.graph_chunk.add((begin_node, rdflib.RDF.type, faldo_strand))
1✔
710
                        self.graph_chunk.add((end_node, rdflib.RDF.type, faldo_strand))
1✔
711

712
                    # Shortcut triple for faldo queries
713
                    self.graph_chunk.add((entity, self.namespace_internal["faldoBegin"], faldo_start))
1✔
714
                    self.graph_chunk.add((entity, self.namespace_internal["faldoEnd"], faldo_end))
1✔
715
                    if faldo_reference:
1✔
716
                        self.graph_chunk.add((entity, self.namespace_internal["faldoReference"], faldo_reference))
1✔
717
                        if faldo_strand:
1✔
718
                            strand_ref = self.get_reference_strand_uri(reference, faldo_strand, None)
1✔
719
                            for sref in strand_ref:
1✔
720
                                self.graph_chunk.add((entity, self.namespace_internal["referenceStrand"], sref))
1✔
721

722
                    if faldo_strand:
1✔
723
                        self.graph_chunk.add((entity, self.namespace_internal["faldoStrand"], faldo_strand))
1✔
724

725
                    # blocks
726
                    block_base = self.settings.getint("triplestore", "block_size")
1✔
727
                    block_start = int(start) // block_base
1✔
728
                    block_end = int(end) // block_base
1✔
729

730
                    for slice_block in range(block_start, block_end + 1):
1✔
731
                        self.graph_chunk.add((entity, self.namespace_internal['includeIn'], rdflib.Literal(int(slice_block))))
1✔
732
                        if reference:
1✔
733
                            block_reference = self.rdfize(self.format_uri("{}_{}".format(reference, slice_block)))
1✔
734
                            self.graph_chunk.add((entity, self.namespace_internal["includeInReference"], block_reference))
1✔
735
                            if faldo_strand:
1✔
736
                                strand_ref = self.get_reference_strand_uri(reference, faldo_strand, slice_block)
1✔
737
                                for sref in strand_ref:
1✔
738
                                    self.graph_chunk.add((entity, self.namespace_internal["includeInReferenceStrand"], sref))
1✔
739
                        if faldo_strand:
1✔
740
                            strand_ref = self.get_reference_strand_uri(None, faldo_strand, slice_block)
1✔
741
                            for sref in strand_ref:
1✔
742
                                self.graph_chunk.add((entity, self.namespace_internal["includeInStrand"], sref))
1✔
743

744
                yield
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc