• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HEPData / hepdata / 16028315195

02 Jul 2025 02:44PM UTC coverage: 83.647%. Remained the same
16028315195

Pull #888

github

GraemeWatt
tests: switch from lxml to xmlschema

* Modify test_xml_validates to avoid problems with lxml v6.0.0.
* Closes #887.
Pull Request #888: Fix `test_xml_validates` by switching from `lxml` to `xmlschema`

0 of 1 new or added line in 1 file covered. (0.0%)

8 existing lines in 1 file now uncovered.

4578 of 5473 relevant lines covered (83.65%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.77
/hepdata/modules/records/api.py
1
# -*- coding: utf-8 -*-
2
#
3
# This file is part of HEPData.
4
# Copyright (C) 2016 CERN.
5
#
6
# HEPData is free software; you can redistribute it
7
# and/or modify it under the terms of the GNU General Public License as
8
# published by the Free Software Foundation; either version 2 of the
9
# License, or (at your option) any later version.
10
#
11
# HEPData is distributed in the hope that it will be
12
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with HEPData; if not, write to the
18
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
19
# MA 02111-1307, USA.
20
#
21
# In applying this license, CERN does not
22
# waive the privileges and immunities granted to it by virtue of its status
23
# as an Intergovernmental Organization or submit itself to any jurisdiction.
24

25
"""API for HEPData-Records."""
26
import os
1✔
27
from collections import OrderedDict
1✔
28
from functools import wraps
1✔
29
import mimetypes
1✔
30
import time
1✔
31

32
from celery import shared_task
1✔
33
from flask import redirect, request, render_template, jsonify, current_app, Response, abort, flash
1✔
34
from flask_login import current_user
1✔
35
from invenio_accounts.models import User
1✔
36
from invenio_db import db
1✔
37
from sqlalchemy import and_, func
1✔
38
from sqlalchemy.orm.exc import NoResultFound
1✔
39
from werkzeug.utils import secure_filename
1✔
40

41
from hepdata.modules.converter import convert_oldhepdata_to_yaml
1✔
42
from hepdata.modules.email.api import send_cookie_email
1✔
43
from hepdata.modules.email.utils import create_send_email_task
1✔
44
from hepdata.modules.permissions.api import user_allowed_to_perform_action
1✔
45
from hepdata.modules.permissions.models import SubmissionParticipant
1✔
46
from hepdata.modules.records.subscribers.api import is_current_user_subscribed_to_record
1✔
47
from hepdata.modules.records.utils.common import decode_string, find_file_in_directory, allowed_file, \
1✔
48
    remove_file_extension, truncate_string, get_record_contents, get_record_by_id, IMAGE_TYPES
49
from hepdata.modules.records.utils.data_processing_utils import process_ctx
1✔
50
from hepdata.modules.records.utils.data_files import get_data_path_for_record, cleanup_old_files
1✔
51
from hepdata.modules.records.utils.json_ld import get_json_ld
1✔
52
from hepdata.modules.records.utils.submission import process_submission_directory, \
1✔
53
    create_data_review, cleanup_submission, clean_error_message_for_display
54
from hepdata.modules.submission.api import get_latest_hepsubmission, get_submission_participants_for_record
1✔
55
from hepdata.modules.records.utils.users import get_coordinators_in_system, has_role
1✔
56
from hepdata.modules.records.utils.workflow import update_action_for_submission_participant
1✔
57
from hepdata.modules.records.utils.yaml_utils import split_files
1✔
58
from hepdata.modules.stats.views import increment, get_count
1✔
59
from hepdata.modules.submission.models import (
1✔
60
    DataReview,
61
    DataSubmission,
62
    HEPSubmission,
63
    RecordVersionCommitMessage,
64
    RelatedRecid,
65
    RelatedTable
66
)
67
from hepdata.utils.file_extractor import extract
1✔
68
from hepdata.utils.miscellaneous import sanitize_html, get_resource_data
1✔
69
from hepdata.utils.users import get_user_from_id
1✔
70
from bs4 import BeautifulSoup
1✔
71
from hepdata_converter_ws_client import Error
1✔
72

73
import tempfile
1✔
74
import shutil
1✔
75

76
import logging
1✔
77
logging.basicConfig()
1✔
78
log = logging.getLogger(__name__)
1✔
79

80
RECORD_PLAIN_TEXT = {
1✔
81
    "passed": "passed review",
82
    "attention": "attention required",
83
    "todo": "to be reviewed"
84
}
85

86
JSON_LD_MIMETYPES = [
1✔
87
    'application/ld+json',
88
    'application/vnd.hepdata.ld+json'
89
]
90

91
def returns_json(f):
1✔
92
    @wraps(f)
1✔
93
    def decorated_function(*args, **kwargs):
1✔
94
        r = f(*args, **kwargs)
1✔
95
        return Response(r, content_type='application/json; charset=utf-8')
1✔
96

97
    return decorated_function
1✔
98

99

100
def format_submission(recid, record, version, version_count, hepdata_submission,
1✔
101
                      data_table=None):
102
    """
103
    Performs all the processing of the record to be displayed.
104

105
    :param recid:
106
    :param record:
107
    :param version:
108
    :param version_count:
109
    :param hepdata_submission:
110
    :param data_table:
111
    :return:
112
    """
113
    ctx = {}
1✔
114
    if hepdata_submission is not None:
1✔
115

116
        ctx['site_url'] = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
1✔
117
        ctx['record'] = record
1✔
118
        ctx["version_count"] = version_count
1✔
119

120
        if version != -1:
1✔
121
            ctx["version"] = version
1✔
122
        else:
123
            # we get the latest version by default
124
            ctx["version"] = version_count
×
125

126
        if record is not None:
1✔
127
            if "collaborations" in record and type(record['collaborations']) is not list:
1✔
128
                collaborations = [x.strip() for x in record["collaborations"].split(",")]
×
129
                ctx['record']['collaborations'] = collaborations
×
130

131
            authors = record.get('authors', None)
1✔
132

133
            create_breadcrumb_text(authors, ctx, record)
1✔
134
            get_commit_message(ctx, recid)
1✔
135

136
            if authors:
1✔
137
                truncate_author_list(record)
1✔
138

139
            determine_user_privileges(recid, ctx)
1✔
140

141
        else:
142
            ctx['record'] = {}
1✔
143
            determine_user_privileges(recid, ctx)
1✔
144
            ctx['show_upload_widget'] = True
1✔
145
            ctx['show_review_widget'] = False
1✔
146

147
        ctx['participant_count'] = SubmissionParticipant.query \
1✔
148
            .filter_by(publication_recid=recid, status="primary") \
149
            .filter(SubmissionParticipant.role.in_(["reviewer", "uploader"])) \
150
            .count()
151
        ctx['reviewers_notified'] = hepdata_submission.reviewers_notified
1✔
152

153
        ctx['record']['last_updated'] = hepdata_submission.last_updated
1✔
154
        ctx['record']['hepdata_doi'] = "{0}".format(hepdata_submission.doi)
1✔
155
        if hepdata_submission.doi:
1✔
156
            ctx['record']['hepdata_doi'] += ".v{0}".format(ctx['version'])
1✔
157

158
        ctx['recid'] = recid
1✔
159
        ctx["status"] = hepdata_submission.overall_status
1✔
160
        ctx['record']['data_abstract'] = sanitize_html(decode_string(hepdata_submission.data_abstract))
1✔
161

162
        extract_journal_info(record)
1✔
163

164
        if hepdata_submission.overall_status != 'finished' and ctx["version_count"] > 0:
1✔
165
            if not (ctx['show_review_widget']
1✔
166
                    or ctx['show_upload_widget']
167
                    or ctx['is_submission_coordinator_or_admin']):
168
                # we show the latest approved version.
169
                ctx["version"] -= 1
×
170
                ctx["version_count"] -= 1
×
171

172
        ctx['additional_resources'] = submission_has_resources(hepdata_submission)
1✔
173
        ctx['resources_with_doi'] = []
1✔
174
        for resource in hepdata_submission.resources:
1✔
175
            if resource.doi:
1✔
176
                ctx['resources_with_doi'].append({
1✔
177
                    'filename': os.path.basename(resource.file_location),
178
                    'description': resource.file_description,
179
                    'doi': resource.doi
180
                })
181

182
        # query for a related data submission
183
        data_record_query = DataSubmission.query.filter_by(
1✔
184
            publication_recid=recid,
185
            version=ctx["version"]).order_by(DataSubmission.id.asc())
186

187
        format_tables(ctx, data_record_query, data_table, recid)
1✔
188

189
        ctx['access_count'] = get_count(recid)
1✔
190
        ctx['mode'] = 'record'
1✔
191
        ctx['coordinator'] = hepdata_submission.coordinator
1✔
192
        ctx['coordinators'] = get_coordinators_in_system()
1✔
193
        ctx['record'].pop('authors', None)
1✔
194

195
    return ctx
1✔
196

197

198
def format_tables(ctx, data_record_query, data_table, recid):
1✔
199
    """
200
    Finds all the tables related to a submission and formats
201
    them for display in the UI or as JSON.
202

203
    :return:
204
    """
205
    first_data_id = -1
1✔
206
    data_table_metadata, first_data_id = process_data_tables(
1✔
207
        ctx, data_record_query, first_data_id, data_table)
208
    assign_or_create_review_status(data_table_metadata, recid, ctx["version"])
1✔
209
    ctx['watched'] = is_current_user_subscribed_to_record(recid)
1✔
210
    ctx['data_tables'] = list(data_table_metadata.values())
1✔
211
    ctx['table_id_to_show'] = first_data_id
1✔
212
    ctx['table_name_to_show'] = ''
1✔
213
    matching_tables = list(filter(
1✔
214
        lambda data_table: data_table['id'] == first_data_id,
215
        ctx['data_tables']))
216
    if matching_tables:
1✔
217
        ctx['table_name_to_show'] = matching_tables[0]['name']
1✔
218
    if 'table' in request.args:
1✔
219
        if request.args['table']:
1✔
220
            table_from_args = request.args['table']
1✔
221
            # Check for table name in list of data tables.
222
            matching_tables = list(filter(
1✔
223
                lambda data_table: data_table['name'] == table_from_args,
224
                ctx['data_tables']))
225
            if not matching_tables:
1✔
226
                # Check for processed table name in list of data tables.
227
                matching_tables = list(filter(
1✔
228
                    lambda data_table: data_table['processed_name'] == table_from_args,
229
                    ctx['data_tables']))
230
            if matching_tables:
1✔
231
                # Set table ID and name to the first matching table.
232
                ctx['table_id_to_show'] = matching_tables[0]['id']
1✔
233
                ctx['table_name_to_show'] = matching_tables[0]['name']
1✔
234

235

236
def format_resource(resource, contents, content_url):
1✔
237
    """
238
    Gets info about a resource ready to be displayed on the resource's
239
    landing page
240

241
    :param resource: DataResource object to be displayed
242
    :param contents: Resource file contents
243

244
    :return: context dictionary ready for the template
245
    """
246
    hepsubmission = HEPSubmission.query.filter(HEPSubmission.resources.any(id=resource.id)).first()
1✔
247
    if not hepsubmission:
1✔
248
        datasubmission = DataSubmission.query.filter(DataSubmission.resources.any(id=resource.id)).first()
×
249
        if datasubmission:
×
250
            hepsubmission = HEPSubmission.query.filter_by(
×
251
                publication_recid=datasubmission.publication_recid,
252
                version=datasubmission.version
253
            ).first()
254
        if not hepsubmission:
×
255
            # Look for DataSubmission mapping to this resource
256
            raise ValueError("Unable to find publication for resource %d. (Is it a data file?)", resource.id)
×
257

258
    record = get_record_contents(hepsubmission.publication_recid)
1✔
259
    ctx = format_submission(hepsubmission.publication_recid, record,
1✔
260
                            hepsubmission.version, 1, hepsubmission)
261
    ctx['record_type'] = 'resource'
1✔
262
    ctx['resource'] = resource
1✔
263
    ctx['contents'] = contents
1✔
264
    ctx['content_url'] = content_url
1✔
265
    ctx['resource_url'] = request.url
1✔
266
    ctx['related_publication_id'] = hepsubmission.publication_recid
1✔
267
    ctx['file_mimetype'] = get_resource_mimetype(resource, contents)
1✔
268
    ctx['resource_filename'] = os.path.basename(resource.file_location)
1✔
269
    ctx['resource_filetype'] = f'{resource.file_type} File'
1✔
270
    ctx['related_recids'] = get_record_data_list(hepsubmission, "related")
1✔
271
    ctx['related_to_this_recids'] = get_record_data_list(hepsubmission, "related_to_this")
1✔
272

273
    if resource.file_type in IMAGE_TYPES:
1✔
274
        ctx['display_type'] = 'image'
×
275
    elif resource.file_location.lower().startswith('http'):
1✔
276
        ctx['display_type'] = 'link'
×
277
        ctx['resource_filename'] = 'External Link'
×
278
        ctx['resource_filetype'] = 'External Link'
×
279
    elif contents == 'Binary':
1✔
280
        ctx['display_type'] = 'binary'
×
281
    else:
282
        ctx['display_type'] = 'code'
1✔
283

284
    ctx['json_ld'] = get_json_ld(
1✔
285
        ctx,
286
        hepsubmission.overall_status
287
    )
288

289
    return ctx
1✔
290

291

292
def get_resource_mimetype(resource, contents):
1✔
293
    file_mimetype = mimetypes.guess_type(resource.file_location)[0]
1✔
294
    if file_mimetype is None:
1✔
295
        if contents == 'Binary':
1✔
296
            file_mimetype = 'application/octet-stream'
1✔
297
        else:
298
            file_mimetype = 'text/plain'
1✔
299
    return file_mimetype
1✔
300

301

302
def should_send_json_ld(request):
1✔
303
    """Determine whether to send JSON-LD instead of HTML for this request
304

305
    :param type request: flask.Request object
306
    :return: True if request accepts JSON-LD; False otherwise
307
    :rtype: bool
308

309
    """
310
    # Determine whether to send JSON-LD
311
    return any([request.accept_mimetypes.quality(m) >= 1 for m in JSON_LD_MIMETYPES])
1✔
312

313

314
def get_commit_message(ctx, recid):
1✔
315
    """
316
    Returns a commit message for the current version if present.
317
    Will return the highest ID of a version-recid pairing.
318

319
    :param ctx:
320
    :param recid:
321
    """
322
    try:
1✔
323
        # Select the most recent commit (greatest ID)
324
        commit_message_query = RecordVersionCommitMessage.query \
1✔
325
            .filter_by(version=ctx["version"], recid=recid) \
326
            .order_by(RecordVersionCommitMessage.id.desc())
327

328
        if commit_message_query.count() > 0:
1✔
329
            commit_message = commit_message_query.first()
1✔
330
            ctx["revision_message"] = {
1✔
331
                'version': commit_message.version,
332
                'message': commit_message.message}
333

334
    except NoResultFound:
×
335
        pass
×
336

337

338
def create_breadcrumb_text(authors, ctx, record):
1✔
339
    """Creates the breadcrumb text for a submission."""
340
    if "first_author" in record and 'full_name' in record["first_author"] \
1✔
341
            and record["first_author"]["full_name"] is not None:
342
        ctx['breadcrumb_text'] = record["first_author"]["full_name"]
1✔
343
    elif authors and authors[0] and 'full_name' in authors[0] \
1✔
344
            and authors[0]["full_name"] is not None:
345
        ctx['breadcrumb_text'] = authors[0]["full_name"]
1✔
346

347
    if authors is not None and len(authors) > 1:
1✔
348
        ctx['breadcrumb_text'] += " et al."
1✔
349

350

351
def submission_has_resources(hepsubmission):
1✔
352
    """
353
    Returns whether the submission has resources attached.
354

355
    :param hepsubmission: HEPSubmission object
356
    :return: bool
357
    """
358
    return len(hepsubmission.resources) > 0
1✔
359

360

361
def extract_journal_info(record):
1✔
362
    if record and 'type' in record:
1✔
363
        if 'thesis' in record['type']:
1✔
364
            if 'type' in record['dissertation']:
×
365
                record['journal_info'] = record['dissertation']['type'] + ", " + record['dissertation'][
×
366
                    'institution']
367
            else:
368
                record['journal_info'] = "PhD Thesis"
×
369
        elif 'conference paper' in record['type']:
1✔
370
            record['journal_info'] = "Conference Paper"
×
371

372

373
def render_record(recid, record, version, output_format, light_mode=False):
1✔
374

375
    # Count number of all versions and number of finished versions of a publication record.
376
    version_count_all = HEPSubmission.query.filter(HEPSubmission.publication_recid == recid,
1✔
377
                                                   and_(HEPSubmission.overall_status != 'sandbox',
378
                                                        HEPSubmission.overall_status != 'sandbox_processing')).count()
379
    version_count_finished = HEPSubmission.query.filter_by(publication_recid=recid, overall_status='finished').count()
1✔
380

381
    # Number of versions that a user is allowed to access based on their permissions.
382
    version_count = version_count_all if user_allowed_to_perform_action(recid) else version_count_finished
1✔
383

384
    # If version not given explicitly, take to be latest allowed version (or 1 if there are no allowed versions).
385
    if version == -1:
1✔
386
        version = version_count if version_count else 1
1✔
387

388
    # Check for a user trying to access a version of a publication record where they don't have permissions.
389
    if version_count < version_count_all and version == version_count_all:
1✔
390
        # Prompt the user to login if they are not authenticated then redirect, otherwise return a 403 error.
391
        if not current_user.is_authenticated:
×
392
            redirect_url_after_login = '%2Frecord%2F{0}%3Fversion%3D{1}%26format%3D{2}'.format(recid, version, output_format)
×
393
            if 'table' in request.args:
×
394
                redirect_url_after_login += '%26table%3D{0}'.format(request.args['table'])
×
395
            if output_format.startswith('yoda') and 'rivet' in request.args:
×
396
                redirect_url_after_login += '%26rivet%3D{0}'.format(request.args['rivet'])
×
397
            return redirect('/login/?next={0}'.format(redirect_url_after_login))
×
398
        else:
399
            abort(403)
×
400

401
    hepdata_submission = get_latest_hepsubmission(publication_recid=recid, version=version)
1✔
402

403
    if hepdata_submission is not None:
1✔
404
        if hepdata_submission.overall_status == 'processing':
1✔
405
            ctx = {'recid': recid}
×
406
            determine_user_privileges(recid, ctx)
×
407
            return render_template('hepdata_records/publication_processing.html', ctx=ctx)
×
408

409
        elif not hepdata_submission.overall_status.startswith('sandbox'):
1✔
410
            ctx = format_submission(recid, record, version, version_count, hepdata_submission)
1✔
411
            ctx['record_type'] = 'publication'
1✔
412
            ctx['related_recids'] = get_record_data_list(hepdata_submission, "related")
1✔
413
            ctx['related_to_this_recids'] = get_record_data_list(hepdata_submission, "related_to_this")
1✔
414

415
            increment(recid)
1✔
416

417
            if output_format == 'html' or output_format == 'json_ld':
1✔
418
                ctx['json_ld'] = get_json_ld(
1✔
419
                    ctx,
420
                    hepdata_submission.overall_status
421
                )
422

423
                if output_format == 'json_ld':
1✔
424
                    status_code = 404 if 'error' in ctx['json_ld'] else 200
1✔
425
                    return jsonify(ctx['json_ld']), status_code
1✔
426

427
                if output_format == 'html':
1✔
428
                    return render_template('hepdata_records/publication_record.html', ctx=ctx)
1✔
429

430
            elif 'table' not in request.args:
×
431
                if output_format == 'json':
×
432
                    ctx = process_ctx(ctx, light_mode)
×
433
                    return jsonify(ctx)
×
434
                elif output_format.startswith('yoda') and 'rivet' in request.args:
×
435
                    return redirect('/download/submission/{0}/{1}/{2}/{3}'.format(recid, version, output_format,
×
436
                                                                              request.args['rivet']))
437
                else:
438
                    return redirect('/download/submission/{0}/{1}/{2}'.format(recid, version, output_format))
×
439
            else:
440
                file_identifier = 'ins{}'.format(hepdata_submission.inspire_id) if hepdata_submission.inspire_id else recid
×
441
                if output_format.startswith('yoda') and 'rivet' in request.args:
×
442
                    return redirect('/download/table/{0}/{1}/{2}/{3}/{4}'.format(
×
443
                        file_identifier, request.args['table'].replace('%', '%25').replace('\\', '%5C'), version, output_format,
444
                        request.args['rivet']))
445
                else:
446
                    return redirect('/download/table/{0}/{1}/{2}/{3}'.format(
×
447
                        file_identifier, request.args['table'].replace('%', '%25').replace('\\', '%5C'), version, output_format))
448
        else:
449
            abort(404)
×
450

451
    elif record is not None:  # this happens when we access an id of a data record
1✔
452
        # in which case, we find the related publication, and
453
        # make the front end focus on the relevant data table.
454
        try:
1✔
455
            publication_recid = int(record['related_publication'])
1✔
456
            publication_record = get_record_contents(publication_recid)
1✔
457

458
            datasubmission = DataSubmission.query.filter_by(associated_recid=recid).one()
1✔
459
            hepdata_submission = get_latest_hepsubmission(publication_recid=publication_recid,
1✔
460
                                                          version=datasubmission.version)
461

462
            ctx = format_submission(publication_recid, publication_record,
1✔
463
                                    datasubmission.version, 1, hepdata_submission,
464
                                    data_table=record['title'])
465
            ctx['record_type'] = 'table'
1✔
466
            ctx['related_publication_id'] = publication_recid
1✔
467
            ctx['table_name'] = record['title']
1✔
468
            ctx['related_recids'] = get_record_data_list(hepdata_submission, "related")
1✔
469
            ctx['related_to_this_recids'] = get_record_data_list(hepdata_submission, "related_to_this")
1✔
470

471
            if output_format == 'html' or output_format == 'json_ld':
1✔
472
                ctx['json_ld'] = get_json_ld(
1✔
473
                    ctx,
474
                    hepdata_submission.overall_status,
475
                    datasubmission
476
                )
477

478
                if output_format == 'json_ld':
1✔
479
                    status_code = 404 if 'error' in ctx['json_ld'] else 200
1✔
480
                    return jsonify(ctx['json_ld']), status_code
1✔
481

482
                return render_template('hepdata_records/related_record.html', ctx=ctx)
×
483

484
            elif output_format.startswith('yoda') and 'rivet' in request.args:
×
485
                return redirect('/download/table/{0}/{1}/{2}/{3}/{4}'.format(
×
486
                    publication_recid, ctx['table_name'].replace('%', '%25').replace('\\', '%5C'), datasubmission.version, output_format,
487
                    request.args['rivet']))
488
            else:
489
                return redirect('/download/table/{0}/{1}/{2}/{3}'.format(
×
490
                    publication_recid, ctx['table_name'].replace('%', '%25').replace('\\', '%5C'), datasubmission.version, output_format))
491

492
        except Exception as e:
×
493
            abort(404)
×
494
    else:
495
        abort(404)
1✔
496

497

498
def has_upload_permissions(recid, user, is_sandbox=False):
1✔
499
    if has_role(user, 'admin'):
1✔
500
        return True
1✔
501

502
    if is_sandbox:
1✔
503
        hepsubmission_record = get_latest_hepsubmission(publication_recid=recid, overall_status='sandbox')
×
504
        return hepsubmission_record is not None and hepsubmission_record.coordinator == user.id
×
505

506
    participant = SubmissionParticipant.query.filter_by(user_account=user.id,
1✔
507
        role='uploader', publication_recid=recid, status='primary').first()
508
    if participant:
1✔
509
        return True
1✔
510

511
def has_coordinator_permissions(recid, user, is_sandbox=False):
1✔
512
    if has_role(user, 'admin'):
1✔
513
        return True
1✔
514

515
    coordinator_record = HEPSubmission.query.filter_by(
1✔
516
        publication_recid=recid,
517
        coordinator=user.get_id()).first()
518
    return coordinator_record is not None
1✔
519

520

521
def create_new_version(recid, user, notify_uploader=True, uploader_message=None):
1✔
522
    hepsubmission = get_latest_hepsubmission(publication_recid=recid)
1✔
523

524
    if hepsubmission.overall_status == 'finished':
1✔
525
        # Reopen the submission to allow for revisions,
526
        # by creating a new HEPSubmission object.
527
        _rev_hepsubmission = HEPSubmission(publication_recid=recid,
1✔
528
                                           overall_status='todo',
529
                                           inspire_id=hepsubmission.inspire_id,
530
                                           coordinator=hepsubmission.coordinator,
531
                                           version=hepsubmission.version + 1)
532
        db.session.add(_rev_hepsubmission)
1✔
533
        db.session.commit()
1✔
534

535
        if notify_uploader:
1✔
536
            uploaders = SubmissionParticipant.query.filter_by(
1✔
537
                role='uploader', publication_recid=recid, status='primary'
538
                )
539
            record_information = get_record_by_id(recid)
1✔
540
            for uploader in uploaders:
1✔
541
                send_cookie_email(uploader,
1✔
542
                                  record_information,
543
                                  message=uploader_message,
544
                                  version=_rev_hepsubmission.version)
545

546
        return jsonify({'success': True, 'version': _rev_hepsubmission.version})
1✔
547
    else:
548
        return jsonify({"message": f"Rec id {recid} is not finished so cannot create a new version"}), 400
1✔
549

550

551
def process_payload(recid, file, redirect_url, synchronous=False):
1✔
552
    """Process an uploaded file
553

554
    :param recid: int
555
        The id of the record to update
556
    :param file: file
557
        The file to process
558
    :param redirect_url: string
559
        Redirect URL to record, for use if the upload fails or in synchronous mode
560
    :param synchronous: bool
561
        Whether to process asynchronously via celery (default) or immediately (only recommended for tests)
562
    :return: JSONResponse either containing 'url' (for success cases) or
563
             'message' (for error cases, which will give a 400 error).
564
    """
565

566
    if file and (allowed_file(file.filename)):
1✔
567
        file_path = save_zip_file(file, recid)
1✔
568
        file_size = os.path.getsize(file_path)
1✔
569
        UPLOAD_MAX_SIZE = current_app.config.get('UPLOAD_MAX_SIZE', 52000000)
1✔
570
        if file_size > UPLOAD_MAX_SIZE:
1✔
571
            return jsonify({"message":
1✔
572
                "{} too large ({} bytes > {} bytes)".format(
573
                    file.filename, file_size, UPLOAD_MAX_SIZE)}), 413
574

575
        hepsubmission = get_latest_hepsubmission(publication_recid=recid)
1✔
576

577
        if hepsubmission.overall_status == 'finished':
1✔
578
            # If it is finished and we receive an update,
579
            # then we need to reopen the submission to allow for revisions,
580
            # by creating a new HEPSubmission object.
581
            _rev_hepsubmission = HEPSubmission(publication_recid=recid,
1✔
582
                                               overall_status='todo',
583
                                               inspire_id=hepsubmission.inspire_id,
584
                                               coordinator=hepsubmission.coordinator,
585
                                               version=hepsubmission.version + 1)
586
            db.session.add(_rev_hepsubmission)
1✔
587
            hepsubmission = _rev_hepsubmission
1✔
588

589
        previous_status = hepsubmission.overall_status
1✔
590
        hepsubmission.overall_status = 'sandbox_processing' if previous_status == 'sandbox' else 'processing'
1✔
591
        db.session.add(hepsubmission)
1✔
592
        db.session.commit()
1✔
593

594
        if synchronous:
1✔
595
            process_saved_file(file_path, recid, current_user.get_id(), redirect_url, previous_status)
1✔
596
        else:
597
            process_saved_file.delay(file_path, recid, current_user.get_id(), redirect_url, previous_status)
1✔
598
            flash('File saved. You will receive an email when the file has been processed.', 'info')
1✔
599

600
        return jsonify({'url': redirect_url.format(recid)})
1✔
601
    else:
602
        return jsonify({"message": "You must upload a .zip, .tar, .tar.gz or .tgz file" +
1✔
603
                        " (or a .oldhepdata or single .yaml or .yaml.gz file)."}), 400
604

605

606
@shared_task
1✔
607
def process_saved_file(file_path, recid, userid, redirect_url, previous_status):
1✔
608
    try:
1✔
609
        hepsubmission = get_latest_hepsubmission(publication_recid=recid)
1✔
610
        if hepsubmission.overall_status != 'processing' and hepsubmission.overall_status != 'sandbox_processing':
1✔
611
            log.error('Record {} is not in a processing state.'.format(recid))
×
612
            return
×
613

614
        errors = process_zip_archive(file_path, recid)
1✔
615

616
        uploader = User.query.get(userid)
1✔
617
        site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
1✔
618

619
        submission_participant = SubmissionParticipant.query.filter_by(
1✔
620
            publication_recid=recid, user_account=userid, role='uploader').first()
621
        if submission_participant:
1✔
622
            full_name = submission_participant.full_name
×
623
        else:
624
            full_name = uploader.email
1✔
625

626
        if errors:
1✔
627
            cleanup_submission(recid, hepsubmission.version, [])  # delete all tables if errors
×
628
            message_body = render_template('hepdata_theme/email/upload_errors.html',
×
629
                                           name=full_name,
630
                                           article=recid,
631
                                           redirect_url=redirect_url.format(recid),
632
                                           errors=errors,
633
                                           site_url=site_url)
634

635
            create_send_email_task(uploader.email,
×
636
                                   '[HEPData] Submission {0} upload failed'.format(recid),
637
                                   message_body)
638
        else:
639
            update_action_for_submission_participant(recid, userid, 'uploader')
1✔
640
            message_body = render_template('hepdata_theme/email/upload_complete.html',
1✔
641
                                           name=full_name,
642
                                           article=recid,
643
                                           link=redirect_url.format(recid),
644
                                           site_url=site_url,
645
                                           overall_status=hepsubmission.overall_status)
646

647
            create_send_email_task(uploader.email,
1✔
648
                                   '[HEPData] Submission {0} upload succeeded'.format(recid),
649
                                   message_body)
650

651
        # Reset the status of the submission back to the previous value.
652
        hepsubmission.overall_status = previous_status
1✔
653
        db.session.add(hepsubmission)
1✔
654
        db.session.commit()
1✔
655

656
        # Delete any previous upload folders relating to non-final versions
657
        # of this hepsubmission
658
        cleanup_old_files(hepsubmission)
1✔
659

660
    except Exception as e:
1✔
661
        # Reset the status and send error emails, unless we're working
662
        # asynchronously and celery is about to retry
663
        if not process_saved_file.request.id \
1✔
664
                or process_saved_file.request.retries >= process_saved_file.max_retries:
665
            try:
1✔
666
                cleanup_submission(recid, hepsubmission.version, [])
1✔
667
                errors = {
1✔
668
                    "Unexpected error": [{
669
                        "level": "error",
670
                        "message": "An unexpected error occurred: {}".format(e)
671
                    }]
672
                }
673
                uploader = User.query.get(userid)
1✔
674
                site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
1✔
675
                message_body = render_template('hepdata_theme/email/upload_errors.html',
1✔
676
                                               name=uploader.email,
677
                                               article=recid,
678
                                               redirect_url=redirect_url.format(recid),
679
                                               errors=errors,
680
                                               site_url=site_url)
681

682
                create_send_email_task(uploader.email,
1✔
683
                                       '[HEPData] Submission {0} upload failed'.format(recid),
684
                                       message_body)
685
                log.error("Final attempt of process_saved_file for recid %s failed. Resetting to previous status." % recid)
1✔
686

687
                # Reset the status of the submission back to the previous value.
688
                hepsubmission.overall_status = previous_status
1✔
689
                db.session.add(hepsubmission)
1✔
690
                db.session.commit()
1✔
691

692
            except Exception as ex:
1✔
693
                log.error("Exception while cleaning up: %s" % ex)
1✔
694

695
        else:
696
            log.debug("Celery will retry task, attempt %s" % process_saved_file.request.retries)
×
697
            raise e
×
698

699

700
def save_zip_file(file, id):
1✔
701
    filename = secure_filename(file.filename)
1✔
702
    time_stamp = str(int(round(time.time())))
1✔
703
    file_save_directory = get_data_path_for_record(str(id), time_stamp)
1✔
704

705
    if filename.endswith('.oldhepdata'):
1✔
706
        file_save_directory = os.path.join(file_save_directory, 'oldhepdata')
1✔
707

708
    if not os.path.exists(file_save_directory):
1✔
709
        os.makedirs(file_save_directory)
1✔
710
    file_path = os.path.join(file_save_directory, filename)
1✔
711

712
    print('Saving file to {}'.format(file_path))
1✔
713
    file.save(file_path)
1✔
714
    return file_path
1✔
715

716

717
def process_zip_archive(file_path, id, old_schema=False):
1✔
718
    (file_save_directory, filename) = os.path.split(file_path)
1✔
719

720
    if not filename.endswith('.oldhepdata'):
1✔
721
        file_save_directory = os.path.dirname(file_path)
1✔
722
        submission_path = os.path.join(file_save_directory, remove_file_extension(filename))
1✔
723
        submission_temp_path = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"])
1✔
724

725
        if filename.endswith('.yaml.gz'):
1✔
726
            print('Extracting: {} to {}'.format(file_path, file_path[:-3]))
1✔
727
            if not extract(file_path, file_path[:-3]):
1✔
728
                message = clean_error_message_for_display(
1✔
729
                    "{} is not a valid .gz file.".format(file_path),
730
                    file_save_directory
731
                )
732
                return {
1✔
733
                    "Archive file extractor": [{
734
                        "level": "error",
735
                        "message": message
736
                    }]
737
                }
738
            return process_zip_archive(file_path[:-3], id,
1✔
739
                                       old_schema=False)
740
        elif filename.endswith('.yaml'):
1✔
741
            # we split the singular yaml file and create a submission directory
742
            error, last_updated = split_files(file_path, submission_temp_path)
1✔
743
            if error:
1✔
744
                message = clean_error_message_for_display(
1✔
745
                    str(error),
746
                    file_save_directory
747
                )
748
                return {
1✔
749
                    "Single YAML file splitter": [{
750
                        "level": "error",
751
                        "message": message
752
                    }]
753
                }
754
        else:
755
            # we are dealing with a zip, tar, etc. so we extract the contents
756
            try:
1✔
757
                unzipped_path = extract(file_path, submission_temp_path)
1✔
758
            except Exception as e:
1✔
759
                # Log the exception and raise it so that celery can retry
760
                log.exception(f"Unable to extract file {file_path}")
1✔
761
                message = clean_error_message_for_display(
1✔
762
                    "Unable to extract file {}. Please check the file is a valid zip or tar archive file and try again later. Contact info@hepdata.net if problems persist.".format(file_path),
763
                    file_save_directory
764
                )
765
                raise ValueError(message) from e
1✔
766

767
            if not unzipped_path:
1✔
768
                message = clean_error_message_for_display(
1✔
769
                    "{} is not a valid zip or tar archive file.".format(file_path),
770
                    file_save_directory
771
                )
772
                return {
1✔
773
                    "Archive file extractor": [{
774
                        "level": "error", "message": message
775
                    }]
776
                }
777

778
        copy_errors = move_files(submission_temp_path, submission_path)
1✔
779
        if copy_errors:
1✔
780
            return copy_errors
1✔
781

782
        submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml")
1✔
783

784
        if not submission_found:
1✔
785
            return {
×
786
                "Archive file extractor": [{
787
                    "level": "error", "message": "No submission.yaml file has been found in the archive."
788
                }]
789
            }
790

791
        basepath, submission_file_path = submission_found
1✔
792

793
    else:
794
        file_dir = os.path.dirname(file_save_directory)
1✔
795
        time_stamp = os.path.split(file_dir)[1]
1✔
796
        result = check_and_convert_from_oldhepdata(os.path.dirname(file_save_directory), id, time_stamp)
1✔
797

798
        # Check for errors
799
        if type(result) == dict:
1✔
800
            return result
×
801
        else:
802
            basepath, submission_file_path = result
1✔
803

804
    return process_submission_directory(basepath, submission_file_path, id,
1✔
805
                                        old_schema=old_schema)
806

807

808
def check_and_convert_from_oldhepdata(input_directory, id, timestamp):
1✔
809
    """
810
    Check if the input directory contains a .oldhepdata file
811
    and convert it to YAML if it happens.
812
    """
813
    converted_path = get_data_path_for_record(str(id), timestamp, 'yaml')
1✔
814

815
    oldhepdata_found = find_file_in_directory(
1✔
816
        input_directory,
817
        lambda x: x.endswith('.oldhepdata'),
818
    )
819
    if not oldhepdata_found:
1✔
820
        return {
×
821
            "Converter": [{
822
                "level": "error",
823
                "message": "No file with .oldhepdata extension has been found."
824
            }]
825
        }
826

827
    converted_temp_dir = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"])
1✔
828
    converted_temp_path = os.path.join(converted_temp_dir, 'yaml')
1✔
829

830
    try:
1✔
831
        successful = convert_oldhepdata_to_yaml(oldhepdata_found[1], converted_temp_path)
1✔
832
        if not successful:
1✔
833
            # Parse error message from title of HTML file, removing part of string after final "//".
NEW
834
            soup = BeautifulSoup(open(converted_temp_path), "html.parser")
×
835
            errormsg = soup.title.string.rsplit("//", 1)[0]
×
836

UNCOV
837
    except Error as error:  # hepdata_converter_ws_client.Error
×
838
        successful = False
×
839
        errormsg = str(error)
×
840

841
    if not successful:
1✔
UNCOV
842
        shutil.rmtree(converted_temp_dir, ignore_errors=True)  # can uncomment when this is definitely working
×
843

UNCOV
844
        return {
×
845
            "Converter": [{
846
                "level": "error",
847
                "message": "The conversion from oldhepdata "
848
                           "to the YAML format has not succeeded. "
849
                           "Error message from converter follows:<br/><br/>" + errormsg
850
            }]
851
        }
852
    else:
853
        copy_errors = move_files(converted_temp_path, converted_path)
1✔
854
        if copy_errors:
1✔
UNCOV
855
            return copy_errors
×
856

857
    return find_file_in_directory(converted_path, lambda x: x == "submission.yaml")
1✔
858

859

860
def move_files(submission_temp_path, submission_path):
1✔
861
    print('Copying files from {} to {}'.format(submission_temp_path + '/.', submission_path))
1✔
862
    try:
1✔
863
        shutil.rmtree(submission_path, ignore_errors=True)
1✔
864
        shutil.copytree(submission_temp_path, submission_path, symlinks=False)
1✔
865
    except shutil.Error as e:
1✔
866
        errors = []
1✔
867
        for srcname, dstname, exception in e.args[0]:
1✔
868
            # Remove full paths from filenames before sending error message to user
869
            filename = srcname.replace(submission_temp_path + '/', '')
1✔
870
            msg = str(exception).replace(submission_temp_path + '/', '').replace(submission_path + '/', '')
1✔
871
            errors.append({
1✔
872
                "level": "error",
873
                "message": 'Invalid file {}: {}'.format(filename, msg)
874
            })
875

876
        return {
1✔
877
            "Exceptions when copying files": errors
878
        }
879
    except Exception as e:
1✔
880
        # Remove full paths from filenames before sending error message to user
881
        msg = str(e).replace(submission_temp_path + '/', '').replace(submission_path + '/', '')
1✔
882
        return {
1✔
883
            "Exceptions when copying files": [{
884
                "level": "error",
885
                "message": msg
886
            }]
887
        }
888

889
    finally:
890
        shutil.rmtree(submission_temp_path, ignore_errors=True)
1✔
891

892

893
def query_messages_for_data_review(data_review_record, messages):
1✔
894
    if data_review_record.messages:
1✔
895
        data_messages = data_review_record.messages
1✔
896
        data_messages.sort(key=lambda data_message: data_message.id, reverse=True)
1✔
897
        for data_message in data_messages:
1✔
898
            current_user_obj = get_user_from_id(data_message.user)
1✔
899
            messages.append(
1✔
900
                {"message": data_message.message,
901
                 "user": current_user_obj.email,
902
                 "post_time": data_message.creation_date})
903

904
    return messages
1✔
905

906

907
def assign_or_create_review_status(data_table_metadata, publication_recid,
1✔
908
                                   version):
909
    """
910
    If a review already exists, it will be attached to the current data record.
911
    If a review does not exist for a data table, it will be created.
912

913
    :param data_table_metadata: the metadata describing the main table.
914
    :param publication_recid: publication record id
915
    :param version:
916
    """
917
    data_review_query = DataReview.query.filter_by(
1✔
918
        publication_recid=publication_recid, version=version)
919
    # this method should also create all the DataReviews for data_tables that
920
    # are not currently present to avoid
921
    # only creating data reviews when the review is clicked explicitly.
922
    assigned_tables = []
1✔
923
    if data_review_query.count() > 0:
1✔
924
        data_review_records = data_review_query.all()
1✔
925

926
        for data_review in data_review_records:
1✔
927
            if data_review.data_recid in data_table_metadata:
1✔
928
                data_table_metadata[data_review.data_recid][
1✔
929
                    "review_flag"] = data_review.status
930
                data_table_metadata[data_review.data_recid]["review_status"] = \
1✔
931
                    RECORD_PLAIN_TEXT[data_review.status]
932
                data_table_metadata[data_review.data_recid]["messages"] = len(
1✔
933
                    data_review.messages) > 0
934
                assigned_tables.append(data_review.data_recid)
1✔
935

936
    # now create the missing data reviews
937
    for data_table_id in data_table_metadata:
1✔
938
        if data_table_id not in assigned_tables:
1✔
939
            data_record = create_data_review(
1✔
940
                data_table_id, publication_recid, version=version)
941
            data_table_metadata[data_table_id][
1✔
942
                "review_flag"] = data_record.status
943
            data_table_metadata[data_table_id]["review_status"] = \
1✔
944
                RECORD_PLAIN_TEXT[data_record.status]
945

946

947
def determine_user_privileges(recid, ctx):
1✔
948
    # show_review_area = not show_upload_area
949
    ctx['show_review_widget'] = False
1✔
950
    ctx['show_upload_widget'] = False
1✔
951
    ctx['is_submission_coordinator_or_admin'] = False
1✔
952
    ctx['is_admin'] = False
1✔
953

954
    if current_user.is_authenticated:
1✔
955
        user_id = current_user.get_id()
1✔
956
        participant_records = get_submission_participants_for_record(recid, user_account=user_id)
1✔
957

958
        for participant_record in participant_records:
1✔
UNCOV
959
            if participant_record is not None:
×
960
                if participant_record.role == 'reviewer' and participant_record.status == 'primary':
×
961
                    ctx['show_review_widget'] = True
×
962

UNCOV
963
                if participant_record.role == 'uploader' and participant_record.status == 'primary':
×
964
                    ctx['show_upload_widget'] = True
×
965

966
        user = User.query.get(current_user.get_id())
1✔
967
        if has_role(user, 'admin'):
1✔
968
            ctx['is_submission_coordinator_or_admin'] = True
1✔
969
            ctx['is_admin'] = True
1✔
970
        else:
UNCOV
971
            matching_records = HEPSubmission.query.filter_by(
×
972
                publication_recid=recid,
973
                coordinator=current_user.get_id()).count()
974

UNCOV
975
            if matching_records > 0:
×
976
                ctx['is_submission_coordinator_or_admin'] = True
×
977

978
        ctx['show_upload_widget'] = (
1✔
979
            ctx['show_upload_widget'] or ctx[
980
                'is_submission_coordinator_or_admin'])
981

982

983
def process_data_tables(ctx, data_record_query, first_data_id,
1✔
984
                        data_table=None):
985
    data_table_metadata = OrderedDict()
1✔
986
    ctx['show_upload_area'] = False
1✔
987

988
    if ctx['show_upload_widget'] and data_record_query.count() == 0:
1✔
989
        ctx['show_upload_area'] = True
1✔
990
    elif data_record_query.count() > 0:
1✔
991
        record_submissions = data_record_query.all()
1✔
992
        for submission_record in record_submissions:
1✔
993
            processed_name = "".join(submission_record.name.split())
1✔
994
            data_table_metadata[submission_record.id] = {
1✔
995
                "id": submission_record.id, "processed_name": processed_name,
996
                "name": submission_record.name,
997
                "location": submission_record.location_in_publication,
998
                # Generate resource metadata
999
                "resources": get_resource_data(submission_record),
1000
                "doi": submission_record.doi,
1001
                "description": sanitize_html(
1002
                    truncate_string(submission_record.description, 20),
1003
                    tags={},
1004
                    strip=True
1005
                )
1006
            }
1007

1008
            if first_data_id == -1:
1✔
1009
                first_data_id = submission_record.id
1✔
1010

1011
            if data_table:
1✔
1012
                if submission_record.name == data_table:
1✔
1013
                    first_data_id = submission_record.id
1✔
1014

1015
    return data_table_metadata, first_data_id
1✔
1016

1017

1018
def truncate_author_list(record, length=10):
1✔
1019
    record['authors'] = record['authors'][:length]
1✔
1020

1021

1022
def get_all_ids(index=None, id_field='recid', last_updated=None, latest_first=False):
1✔
1023
    """Get all record or inspire ids of publications in the search index
1024

1025
    :param index: name of index to use.
1026
    :param id_field: id type to return. Should be 'recid' or 'inspire_id'
1027
    :return: list of integer ids
1028
    """
1029
    if id_field not in ('recid', 'inspire_id'):
1✔
1030
        raise ValueError('Invalid ID field %s' % id_field)
1✔
1031

1032
    db_col = HEPSubmission.publication_recid if id_field == 'recid' \
1✔
1033
        else HEPSubmission.inspire_id
1034

1035
    # Get unique version
1036
    query = db.session.query(db_col) \
1✔
1037
        .filter(HEPSubmission.overall_status == 'finished')
1038

1039
    if last_updated:
1✔
1040
        query = query.filter(HEPSubmission.last_updated >= last_updated)
1✔
1041

1042
    if latest_first:
1✔
1043
        # Use a set to check for duplicates, as sorting by last_updated
1044
        # means distinct doesn't work (as it looks for distinct across both
1045
        # cols)
1046
        query = query.order_by(HEPSubmission.last_updated.desc())
1✔
1047
        seen = set()
1✔
1048
        seen_add = seen.add
1✔
1049
        return [
1✔
1050
            int(x[0]) for x in query.all() if not (x[0] in seen or seen_add(x[0]))
1051
        ]
1052
    else:
1053
        query = query.order_by(HEPSubmission.publication_recid).distinct()
1✔
1054
        return [int(x[0]) for x in query.all()]
1✔
1055

1056

1057
def get_related_hepsubmissions(submission):
1✔
1058
    """
1059
    Queries the database for all HEPSubmission objects contained in
1060
    this object's related record ID list.
1061
    (All submissions this one is relating to)
1062

1063
    :return: [list] A list of HEPSubmission objects
1064
    """
1065
    related_submissions = []
1✔
1066
    for related in submission.related_recids:
1✔
1067
        data_submission = get_latest_hepsubmission(
1✔
1068
            publication_recid=related.related_recid
1069
        )
1070
        if data_submission:
1✔
1071
            related_submissions.append(data_submission)
1✔
1072
    return related_submissions
1✔
1073

1074

1075
def get_related_to_this_hepsubmissions(submission):
1✔
1076
    """
1077
    Queries the database for all records in the RelatedRecId table
1078
    that have THIS record's id as a related record.
1079
    Then returns the HEPSubmission object marked in the RelatedRecid table.
1080
    Returns only submissions marked as 'finished'
1081

1082
    :return: [list] List containing related records.
1083
    """
1084

1085
    # We use a subquery to get the max version/recid pairing
1086
    subquery = (
1✔
1087
        HEPSubmission.query
1088
        .with_entities(
1089
            HEPSubmission.publication_recid,
1090
            func.max(HEPSubmission.version).label('max_version')
1091
        )
1092
        .group_by(HEPSubmission.publication_recid)
1093
        .subquery()
1094
    )
1095

1096
    # Use result of subquery to join and select the max submission where related
1097
    related_submissions = (
1✔
1098
        HEPSubmission.query
1099
        .join(subquery, (HEPSubmission.publication_recid == subquery.c.publication_recid) & (
1100
                HEPSubmission.version == subquery.c.max_version))
1101
        .join(RelatedRecid, RelatedRecid.this_recid == HEPSubmission.publication_recid)
1102
        .filter(RelatedRecid.related_recid == submission.publication_recid)
1103
        .all()
1104
    )
1105

1106
    # Set comprehension to determine unique IDs where the max version object is 'finished'
1107
    unique_recids = {sub.publication_recid for sub in related_submissions if sub.overall_status == 'finished'}
1✔
1108

1109
    return [get_latest_hepsubmission(publication_recid=recid, overall_status='finished') for recid in unique_recids]
1✔
1110

1111

1112
def get_related_datasubmissions(data_submission):
1✔
1113
    """
1114
    Queries the database for all DataSubmission objects contained in
1115
    this object's related DOI list.
1116
    (All submissions this one is relating to)
1117

1118
    :param data_submission: The datasubmission object to find related data for.
1119
    :return: [list] A list of DataSubmission objects
1120
    """
1121
    related_submissions = []
1✔
1122
    for related in data_submission.related_tables:
1✔
1123
        submission = (
1✔
1124
            DataSubmission.query
1125
            .filter(DataSubmission.doi == related.related_doi)
1126
            .join(HEPSubmission, HEPSubmission.publication_recid == DataSubmission.publication_recid)
1127
            .first()
1128
        )
1129
        if submission:
1✔
1130
            related_submissions.append(submission)
1✔
1131
    return related_submissions
1✔
1132

1133

1134
def get_related_to_this_datasubmissions(data_submission):
1✔
1135
    """
1136
        Get the DataSubmission Objects with a RelatedTable entry
1137
        where this doi is referred to in related_doi.
1138
        Only returns where associated HEPSubmission object is `finished`,
1139
        OR where it is within the same HEPSubmission
1140

1141
        :param data_submission: The datasubmission to find the related entries for.
1142
        :return: [List] List of DataSubmission objects.
1143
    """
1144
    related_submissions = (
1✔
1145
        DataSubmission.query
1146
        .join(RelatedTable, RelatedTable.table_doi == DataSubmission.doi)
1147
        .join(HEPSubmission, (HEPSubmission.publication_recid == DataSubmission.publication_recid))
1148
        .group_by(DataSubmission.id)
1149
        .having(func.max(HEPSubmission.version) == DataSubmission.version)
1150
        .filter(RelatedTable.related_doi == data_submission.doi)
1151
        # If finished, OR is part of the same submission
1152
        .filter(
1153
            (HEPSubmission.overall_status == 'finished') | (
1154
                HEPSubmission.publication_recid == data_submission.publication_recid))
1155
        .all()
1156
    )
1157
    return related_submissions
1✔
1158

1159

1160
def get_record_data_list(record, data_type):
1✔
1161
    """
1162
    Generates a dictionary (title/recid) from a list of record IDs.
1163
    This must be done as the record contents are not stored within the hepsubmission object.
1164

1165
    :param record: The record used for the query.
1166
    :param data_type: Either the related, or related to this data.
1167
    :return: [list] A list of dictionary objects containing record ID and title pairs
1168
    """
1169
    # Selects the related data based on the data_type flag
1170
    data = []
1✔
1171
    if data_type == "related":
1✔
1172
        data = get_related_hepsubmissions(record)
1✔
1173
    elif data_type == "related_to_this":
1✔
1174
        data = get_related_to_this_hepsubmissions(record)
1✔
1175

1176
    record_data = []
1✔
1177
    for datum in data:
1✔
1178
        record_data.append(
1✔
1179
        {
1180
            "recid": datum.publication_recid,
1181
            "title": get_record_contents(datum.publication_recid)["title"],
1182
            "version": datum.version
1183
        })
1184
    return record_data
1✔
1185

1186

1187
def get_table_data_list(table, data_type):
1✔
1188
    """
1189
    Generates a list of general information (name, doi, desc) dictionaries of related DataSubmission objects.
1190
    Will either use the related data list (get_related_data_submissions)
1191
    OR the `related to this` list (generated by get_related_to_this_datasubmissions)
1192

1193
    :param table: The DataSubmission object used for querying.
1194
    :param data_type: The flag to decide which relation data to use.
1195
    :return: [list] A list of dictionaries with the name, doi and description of the object.
1196
    """
1197
    # Selects the related data based on the data_type flag
1198
    if data_type == "related":
1✔
1199
        data = get_related_datasubmissions(table)
1✔
1200
    elif data_type == "related_to_this":
1✔
1201
        data = get_related_to_this_datasubmissions(table)
1✔
1202

1203
    record_data = []
1✔
1204
    if data:
1✔
1205
        for datum in data:
1✔
1206
            record_data.append({
1✔
1207
                "name": datum.name,
1208
                "doi": datum.doi,
1209
                "description": datum.description
1210
            })
1211
    return record_data
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc