• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

askomics / flaskomics / 6590008757

20 Oct 2023 03:58PM UTC coverage: 83.758% (+0.4%) from 83.31%
6590008757

push

github-actions

web-flow
Merge pull request #420 from askomics/dev

Release 4.5.0

633 of 633 new or added lines in 29 files covered. (100.0%)

6240 of 7450 relevant lines covered (83.76%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.5
/askomics/libaskomics/FilesHandler.py
1
import os
1✔
2
import time
1✔
3
import requests
1✔
4

5
from askomics.libaskomics.BedFile import BedFile
1✔
6
from askomics.libaskomics.CsvFile import CsvFile
1✔
7
from askomics.libaskomics.FilesUtils import FilesUtils
1✔
8
from askomics.libaskomics.GffFile import GffFile
1✔
9
from askomics.libaskomics.RdfFile import RdfFile
1✔
10
from askomics.libaskomics.Database import Database
1✔
11
from askomics.libaskomics.Utils import Utils
1✔
12

13

14
class FilesHandler(FilesUtils):
1✔
15
    """Handle files
16

17
    Attributes
18
    ----------
19
    files : list
20
        list of File
21
    host_url : string
22
        AskOmics url, for the triplestore
23
    upload_path : string
24
        Upload path
25
    """
26

27
    def __init__(self, app, session, host_url=None, external_endpoint=None, custom_uri=None, external_graph=None):
1✔
28
        """init
29

30
        Parameters
31
        ----------
32
        app : Flask
33
            flask app
34
        session :
35
            AskOmics session, contain the user
36
        host_url : None, optional
37
            AskOmics url, for the triplestore
38
        """
39
        FilesUtils.__init__(self, app, session)
1✔
40
        self.files = []
1✔
41
        self.host_url = host_url
1✔
42
        self.upload_path = "{}/{}_{}/upload".format(
1✔
43
            self.settings.get("askomics", "data_directory"),
44
            self.session['user']['id'],
45
            self.session['user']['username']
46
        )
47
        self.date = None
1✔
48
        self.external_endpoint = external_endpoint
1✔
49
        self.custom_uri = custom_uri
1✔
50
        self.external_graph = external_graph
1✔
51

52
    def handle_files(self, files_id):
1✔
53
        """Handle file
54

55
        Parameters
56
        ----------
57
        files_id : list
58
            id of files to handle
59
        """
60
        files_infos = self.get_files_infos(files_id=files_id, return_path=True)
1✔
61

62
        for file in files_infos:
1✔
63
            if file['type'] == 'csv/tsv':
1✔
64
                self.files.append(CsvFile(self.app, self.session, file, host_url=self.host_url, external_endpoint=self.external_endpoint, custom_uri=self.custom_uri, external_graph=self.external_graph))
1✔
65
            elif file['type'] == 'gff/gff3':
1✔
66
                self.files.append(GffFile(self.app, self.session, file, host_url=self.host_url, external_endpoint=self.external_endpoint, custom_uri=self.custom_uri, external_graph=self.external_graph))
1✔
67
            elif file['type'] in ('rdf/ttl', 'rdf/xml', 'rdf/nt'):
1✔
68
                self.files.append(RdfFile(self.app, self.session, file, host_url=self.host_url, external_endpoint=self.external_endpoint, custom_uri=self.custom_uri, external_graph=self.external_graph))
1✔
69
            elif file['type'] == 'bed':
1✔
70
                self.files.append(BedFile(self.app, self.session, file, host_url=self.host_url, external_endpoint=self.external_endpoint, custom_uri=self.custom_uri, external_graph=self.external_graph))
1✔
71

72
    def get_files_infos(self, files_id=None, files_path=None, return_path=False):
1✔
73
        """Get files info
74

75
        Parameters
76
        ----------
77
        files_id : None, optional
78
            list of files id
79
        return_path : bool, optional
80
            return the path if True
81

82
        Returns
83
        -------
84
        list
85
            list of files info
86
        """
87
        database = Database(self.app, self.session)
1✔
88

89
        if files_id:
1✔
90
            subquery_str = '(' + ' OR '.join(['id = ?'] * len(files_id)) + ')'
1✔
91

92
            query = '''
1✔
93
            SELECT id, name, type, size, path, date, status
94
            FROM files
95
            WHERE user_id = ?
96
            AND {}
97
            '''.format(subquery_str)
98

99
            rows = database.execute_sql_query(query, (self.session['user']['id'], ) + tuple(files_id))
1✔
100

101
        elif files_path:
1✔
102
            subquery_str = '(' + ' OR '.join(['path = ?'] * len(files_path)) + ')'
1✔
103

104
            query = '''
1✔
105
            SELECT id, name, type, size, path, date, status
106
            FROM files
107
            WHERE user_id = ?
108
            AND {}
109
            '''.format(subquery_str)
110

111
            rows = database.execute_sql_query(query, (self.session['user']['id'], ) + tuple(files_path))
1✔
112

113
        else:
114

115
            query = '''
1✔
116
            SELECT id, name, type, size, path, date, status
117
            FROM files
118
            WHERE user_id = ?
119
            '''
120

121
            rows = database.execute_sql_query(query, (self.session['user']['id'], ))
1✔
122

123
        files = []
1✔
124
        for row in rows:
1✔
125
            file = {
1✔
126
                'id': row[0],
127
                'name': row[1],
128
                'type': row[2],
129
                'size': row[3],
130
                'date': row[5],
131
                'status': row[6]
132
            }
133
            if return_path:
1✔
134
                file['path'] = row[4]
1✔
135
            files.append(file)
1✔
136

137
        return files
1✔
138

139
    def get_all_files_infos(self):
1✔
140

141
        if not self.session['user']['admin']:
1✔
142
            return []
×
143

144
        database = Database(self.app, self.session)
1✔
145

146
        query = '''
1✔
147
        SELECT files.id, files.name, files.type, files.size, files.date, files.status, users.username
148
        FROM files
149
        INNER JOIN users ON files.user_id=users.user_id
150
        '''
151

152
        rows = database.execute_sql_query(query, ())
1✔
153

154
        files = []
1✔
155
        for row in rows:
1✔
156
            file = {
1✔
157
                'id': row[0],
158
                'name': row[1],
159
                'type': row[2],
160
                'size': row[3],
161
                'date': row[4],
162
                'status': row[5],
163
                'user': row[6]
164
            }
165
            files.append(file)
1✔
166

167
        return files
1✔
168

169
    def get_file_name(self):
1✔
170
        """Get a random file name
171

172
        Returns
173
        -------
174
        string
175
            file name
176
        """
177
        name = Utils.get_random_string(10)
1✔
178
        file_path = "{}/{}".format(self.upload_path, name)
1✔
179
        # Make sure it is not in use already
180
        while os.path.isfile(file_path):
1✔
181
            name = Utils.get_random_string(10)
×
182
            file_path = "{}/{}".format(self.upload_path, name)
×
183

184
        return name
1✔
185

186
    def write_data_into_file(self, data, file_name, mode, should_exist=False):
1✔
187
        """Write data into a file
188

189
        Parameters
190
        ----------
191
        data : string
192
            data to write
193
        file_name : string
194
            Local file name
195
        mode : string
196
            open mode (w or a)
197
        """
198
        file_path = "{}/{}".format(self.upload_path, file_name)
1✔
199
        if mode == "a":
1✔
200
            if not os.path.isfile(file_path):
1✔
201
                raise Exception("No file exists at this path")
×
202
            # Check this path does not already exists in database (meaning, already uploaded)
203
            if len(self.get_files_infos(files_path=[file_path])) > 0:
1✔
204
                raise Exception("A file with this path already exists in database")
×
205

206
        with open(file_path, mode) as file:
1✔
207
            file.write(data)
1✔
208

209
    def store_file_info_in_db(self, name, filetype, file_name, size, status="available", task_id=None):
1✔
210
        """Store the file info in the database
211

212
        Parameters
213
        ----------
214
        name : string
215
            Name of the file
216
        filetype : string
217
            Type (csv ...)
218
        file_name : string
219
            Local file name
220
        size : string
221
            Size of file
222
        status: string
223
            Status of the file (downloading, available, unavailable)
224
        Returns
225
        -------
226
        str
227
            file id
228
        """
229
        file_path = "{}/{}".format(self.upload_path, file_name)
1✔
230

231
        database = Database(self.app, self.session)
1✔
232
        query = '''
1✔
233
        INSERT INTO files VALUES(
234
            NULL,
235
            ?,
236
            ?,
237
            ?,
238
            ?,
239
            ?,
240
            ?,
241
            ?,
242
            ?
243
        )
244
        '''
245

246
        # Type
247
        if filetype in ('text/tab-separated-values', 'tabular'):
1✔
248
            filetype = 'csv/tsv'
1✔
249
        elif filetype in ('text/turtle', 'ttl'):
1✔
250
            filetype = 'rdf/ttl'
×
251
        elif filetype in ["text/xml", "application/rdf+xml"]:
1✔
252
            filetype = "rdf/xml"
×
253
        elif filetype == "application/n-triples":
1✔
254
            filetype = "rdf/nt"
×
255
        elif filetype in ('gff', ):
1✔
256
            filetype = 'gff/gff3'
×
257
        else:
258
            filetype = self.get_type(os.path.splitext(name)[1])
1✔
259

260
        self.date = int(time.time())
1✔
261

262
        return database.execute_sql_query(query, (self.session['user']['id'], name, filetype, file_path, size, self.date, status, task_id), get_id=True)
1✔
263

264
    def update_file_info(self, file_id, size=None, status="", task_id=""):
1✔
265
        """Update file size and status
266

267
        Parameters
268
        ----------
269
        file_id : str
270
            File id
271
        file_size : str
272
            File current size
273
        status : str
274
            File status
275
        task_id : str
276
            Current task id
277
        """
278

279
        if not (size is not None or status or task_id):
1✔
280
            return
×
281

282
        query_vars = []
1✔
283
        database = Database(self.app, self.session)
1✔
284

285
        size_query = ""
1✔
286
        status_query = ""
1✔
287
        task_query = ""
1✔
288

289
        # Should be a cleaner way of doing this...
290
        if size is not None:
1✔
291
            size_query = "size=?," if (status or task_id) else "size=?"
1✔
292
            query_vars.append(size)
1✔
293

294
        if status:
1✔
295
            status_query = "status=?," if task_id else "status=?"
1✔
296
            query_vars.append(status)
1✔
297

298
        if task_id:
1✔
299
            task_query = "task_id=?"
×
300
            query_vars.append(task_id)
×
301

302
        query_vars.append(file_id)
1✔
303

304
        query = '''
1✔
305
        UPDATE files SET
306
        {}
307
        {}
308
        {}
309
        WHERE id=?
310
        '''.format(size_query, status_query, task_query)
311

312
        database.execute_sql_query(query, tuple(query_vars))
1✔
313

314
    def persist_chunk(self, chunk_info):
1✔
315
        """Persist a file by chunk. Store info in db if the chunk is the last
316

317
        Parameters
318
        ----------
319
        chunk_info : dict
320
            Info about the chunk
321

322
        Returns
323
        -------
324
        str
325
            local filename
326
        """
327
        try:
1✔
328
            # 1 chunk file
329
            if chunk_info["first"] and chunk_info["last"]:
1✔
330
                # Write data into file
331
                file_name = self.get_file_name()
1✔
332
                self.write_data_into_file(chunk_info["chunk"], file_name, "w")
1✔
333
                # store file info in db
334
                self.store_file_info_in_db(chunk_info["name"], chunk_info["type"], file_name, chunk_info["size"])
1✔
335
            # first chunk of large file
336
            elif chunk_info["first"]:
1✔
337
                file_name = self.get_file_name()
1✔
338
                self.write_data_into_file(chunk_info["chunk"], file_name, "w")
1✔
339
            # last chunk of large file
340
            elif chunk_info["last"]:
1✔
341
                file_name = chunk_info["path"]
1✔
342
                self.write_data_into_file(chunk_info["chunk"], file_name, "a")
1✔
343
                self.store_file_info_in_db(chunk_info["name"], chunk_info["type"], file_name, chunk_info["size"])
1✔
344
            # chunk of large file
345
            else:
346
                file_name = chunk_info["path"]
1✔
347
                self.write_data_into_file(chunk_info["chunk"], file_name, "a")
1✔
348

349
            return file_name
1✔
350
        except Exception as e:
×
351
            # Rollback
352
            try:
×
353
                file_path = "{}/{}".format(self.upload_path, file_name)
×
354
                # Delete if it does not exists in DB
355
                if len(self.get_files_infos(files_path=[file_path])) == 0:
×
356
                    self.delete_file_from_fs(file_path)
×
357
            except Exception:
×
358
                pass
×
359
            raise e
×
360

361
    def download_url(self, url, task_id):
1✔
362
        """Download a file from an URL and insert info in database
363

364
        Parameters
365
        ----------
366
        url : string
367
            The file url
368
        """
369
        # Get name, path; est and type
370
        name = url.split("/")[-1]
1✔
371
        file_name = self.get_file_name()
1✔
372
        path = "{}/{}".format(self.upload_path, file_name)
1✔
373
        file_id = self.store_file_info_in_db(name, "", file_name, 0, "downloading", task_id)
1✔
374

375
        # Get file
376
        try:
1✔
377
            with requests.get(url, stream=True) as r:
1✔
378
                r.raise_for_status()
1✔
379
                count = 0
1✔
380
                with open(path, 'wb') as file:
1✔
381
                    for chunk in r.iter_content(chunk_size=1024 * 1024 * 10):
1✔
382
                        # Update size every ~1GO
383
                        # + Check quota
384
                        if count == 100:
1✔
385
                            if self.session['user']['quota'] > 0:
×
386
                                total_size = self.get_size_occupied_by_user() + os.path.getsize(path)
×
387
                                if total_size >= self.session['user']['quota']:
×
388
                                    raise Exception("Exceeded quota")
×
389
                            self.update_file_info(file_id, size=os.path.getsize(path))
×
390
                            count = 0
×
391

392
                        file.write(chunk)
1✔
393
                        count += 1
1✔
394

395
            # Update final value
396
            self.update_file_info(file_id, size=os.path.getsize(path), status="available")
1✔
397

398
        except Exception:
×
399
            self.update_file_info(file_id, size=os.path.getsize(path), status="error")
×
400

401
    def get_type(self, file_ext):
1✔
402
        """Get files type, based on extension
403

404
        TODO: sniff file to get type
405

406
        Parameters
407
        ----------
408
        file_ext : string
409
            file extension
410

411
        Returns
412
        -------
413
        string
414
            file type
415
        """
416
        if file_ext in ('.csv', '.tsv', '.tabular'):
1✔
417
            return 'csv/tsv'
1✔
418
        elif file_ext in ('.gff', '.gff2', '.gff3'):
1✔
419
            return 'gff/gff3'
1✔
420
        elif file_ext in ('.bed', ):
1✔
421
            return 'bed'
1✔
422
        elif file_ext in ('.ttl', '.turtle'):
1✔
423
            return 'rdf/ttl'
1✔
424
        elif file_ext in ('.xml', ):
1✔
425
            return 'rdf/xml'
1✔
426
        elif file_ext in ('.nt', ):
1✔
427
            return 'rdf/nt'
1✔
428
        # Default is csv
429
        return 'csv/tsv'
×
430

431
    def delete_files(self, files_id, admin=False):
1✔
432
        """Delete files from database and filesystem
433

434
        Parameters
435
        ----------
436
        files_id : list
437
            list of file id
438

439
        Returns
440
        -------
441
        list
442
            list of files info
443
        """
444
        for fid in files_id:
1✔
445
            file_path = self.get_file_path(fid)
1✔
446
            if os.path.isfile(file_path):
1✔
447
                self.delete_file_from_fs(file_path)
1✔
448
            self.delete_file_from_db(fid, admin=admin)
1✔
449

450
        if admin and self.session['user']['admin']:
1✔
451
            return self.get_all_files_infos()
1✔
452
        else:
453
            return self.get_files_infos()
1✔
454

455
    def delete_file_from_db(self, file_id, admin=False):
1✔
456
        """remove a file for the database
457

458
        Parameters
459
        ----------
460
        file_id : int
461
            the file id to remove
462
        """
463

464
        database = Database(self.app, self.session)
1✔
465

466
        if admin and self.session['user']['admin']:
1✔
467
            query_params = (file_id,)
1✔
468
            where_query = ""
1✔
469

470
        else:
471
            query_params = (file_id, self.session['user']['id'])
1✔
472
            where_query = "AND user_id=?"
1✔
473

474
        query = '''
1✔
475
        DELETE FROM files
476
        WHERE id=?
477
        {}
478
        '''.format(where_query)
479

480
        database.execute_sql_query(query, query_params)
1✔
481

482
    def delete_file_from_fs(self, file_path):
1✔
483
        """Delete a file from filesystem
484

485
        Parameters
486
        ----------
487
        file_path : string
488
            Path to the file
489
        """
490
        os.remove(file_path)
1✔
491

492
    def get_file_path(self, file_id):
1✔
493
        """Get the file path with id
494

495
        Parameters
496
        ----------
497
        file_id : int
498
            the file id
499

500
        Returns
501
        -------
502
        string
503
            file path
504
        """
505
        database = Database(self.app, self.session)
1✔
506

507
        query = '''
1✔
508
        SELECT path
509
        FROM files
510
        WHERE id=?
511
        '''
512

513
        row = database.execute_sql_query(query, (file_id, ))
1✔
514

515
        return row[0][0]
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc