• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HEPData / hepdata / 21246856953

22 Jan 2026 11:32AM UTC coverage: 84.743% (+0.07%) from 84.673%
21246856953

Pull #898

github

web-flow
Merge 1a16115c6 into 7db713a49
Pull Request #898: Observer role

145 of 182 new or added lines in 8 files covered. (79.67%)

194 existing lines in 3 files now uncovered.

4860 of 5735 relevant lines covered (84.74%)

0.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.57
/hepdata/modules/records/api.py
1
# -*- coding: utf-8 -*-
2
#
3
# This file is part of HEPData.
4
# Copyright (C) 2016 CERN.
5
#
6
# HEPData is free software; you can redistribute it
7
# and/or modify it under the terms of the GNU General Public License as
8
# published by the Free Software Foundation; either version 2 of the
9
# License, or (at your option) any later version.
10
#
11
# HEPData is distributed in the hope that it will be
12
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with HEPData; if not, write to the
18
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
19
# MA 02111-1307, USA.
20
#
21
# In applying this license, CERN does not
22
# waive the privileges and immunities granted to it by virtue of its status
23
# as an Intergovernmental Organization or submit itself to any jurisdiction.
24

25
"""API for HEPData-Records."""
26
import os
1✔
27
from collections import OrderedDict
1✔
28
from functools import wraps
1✔
29
import mimetypes
1✔
30
import time
1✔
31

32
from celery import shared_task
1✔
33
from flask import redirect, request, render_template, jsonify, current_app, Response, abort, flash, url_for
1✔
34
from flask_login import current_user
1✔
35
from invenio_accounts.models import User
1✔
36
from invenio_db import db
1✔
37
from sqlalchemy import and_, func
1✔
38
from sqlalchemy.orm.exc import NoResultFound
1✔
39
from werkzeug.utils import secure_filename
1✔
40

41
from hepdata.modules.converter import convert_oldhepdata_to_yaml
1✔
42
from hepdata.modules.email.api import send_cookie_email, notify_submission_created
1✔
43
from hepdata.modules.email.utils import create_send_email_task
1✔
44
from hepdata.modules.permissions.api import user_allowed_to_perform_action, verify_observer_key
1✔
45
from hepdata.modules.permissions.models import SubmissionParticipant
1✔
46
from hepdata.modules.records.subscribers.api import is_current_user_subscribed_to_record
1✔
47
from hepdata.modules.records.utils.common import decode_string, find_file_in_directory, allowed_file, \
1✔
48
    remove_file_extension, truncate_string, get_record_contents, get_record_by_id, IMAGE_TYPES
49
from hepdata.modules.records.utils.data_processing_utils import process_ctx
1✔
50
from hepdata.modules.records.utils.data_files import get_data_path_for_record, cleanup_old_files
1✔
51
from hepdata.modules.records.utils.json_ld import get_json_ld
1✔
52
from hepdata.modules.records.utils.submission import process_submission_directory, \
1✔
53
    create_data_review, cleanup_submission, clean_error_message_for_display
54
from hepdata.modules.submission.api import get_latest_hepsubmission, get_submission_participants_for_record, \
1✔
55
    get_or_create_submission_observer
56
from hepdata.modules.records.utils.users import get_coordinators_in_system, has_role
1✔
57
from hepdata.modules.records.utils.workflow import update_action_for_submission_participant
1✔
58
from hepdata.modules.records.utils.yaml_utils import split_files
1✔
59
from hepdata.modules.stats.views import increment, get_count
1✔
60
from hepdata.modules.submission.models import (
1✔
61
    DataReview,
62
    DataSubmission,
63
    HEPSubmission,
64
    RecordVersionCommitMessage,
65
    RelatedRecid,
66
    RelatedTable,
67
    SubmissionObserver
68
)
69
from hepdata.utils.file_extractor import extract
1✔
70
from hepdata.utils.miscellaneous import sanitize_html, get_resource_data
1✔
71
from hepdata.utils.users import get_user_from_id
1✔
72
from bs4 import BeautifulSoup
1✔
73
from hepdata_converter_ws_client import Error
1✔
74

75
import tempfile
1✔
76
import shutil
1✔
77

78
import logging
1✔
79
logging.basicConfig()
1✔
80
log = logging.getLogger(__name__)
1✔
81

82
RECORD_PLAIN_TEXT = {
1✔
83
    "passed": "passed review",
84
    "attention": "attention required",
85
    "todo": "to be reviewed"
86
}
87

88
JSON_LD_MIMETYPES = [
1✔
89
    'application/ld+json',
90
    'application/vnd.hepdata.ld+json'
91
]
92

93
def returns_json(f):
1✔
94
    @wraps(f)
1✔
95
    def decorated_function(*args, **kwargs):
1✔
96
        r = f(*args, **kwargs)
1✔
97
        return Response(r, content_type='application/json; charset=utf-8')
1✔
98

99
    return decorated_function
1✔
100

101

102
def format_submission(recid, record, version, version_count, hepdata_submission,
1✔
103
                      data_table=None, observer_view=None):
104
    """
105
    Performs all the processing of the record to be displayed.
106

107
    :param recid:
108
    :param record:
109
    :param version:
110
    :param version_count:
111
    :param hepdata_submission:
112
    :param data_table:
113
    :param observer_view:
114
    :return:
115
    """
116
    ctx = {}
1✔
117
    if hepdata_submission is not None:
1✔
118

119
        ctx['site_url'] = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
1✔
120
        ctx['record'] = record
1✔
121
        ctx["version_count"] = version_count
1✔
122

123
        if version != -1:
1✔
124
            ctx["version"] = version
1✔
125
        else:
126
            # we get the latest version by default
UNCOV
127
            ctx["version"] = version_count
×
128

129
        if record is not None:
1✔
130
            if "collaborations" in record and type(record['collaborations']) is not list:
1✔
131
                collaborations = [x.strip() for x in record["collaborations"].split(",")]
×
UNCOV
132
                ctx['record']['collaborations'] = collaborations
×
133

134
            authors = record.get('authors', None)
1✔
135

136
            create_breadcrumb_text(authors, ctx, record)
1✔
137
            get_commit_message(ctx, recid)
1✔
138

139
            if authors:
1✔
140
                truncate_author_list(record)
1✔
141

142
            determine_user_privileges(recid, ctx)
1✔
143

144
        else:
145
            ctx['record'] = {}
1✔
146
            determine_user_privileges(recid, ctx)
1✔
147
            ctx['show_upload_widget'] = True
1✔
148
            ctx['show_review_widget'] = False
1✔
149

150
        ctx['participant_count'] = SubmissionParticipant.query \
1✔
151
            .filter_by(publication_recid=recid, status="primary") \
152
            .filter(SubmissionParticipant.role.in_(["reviewer", "uploader"])) \
153
            .count()
154
        ctx['reviewers_notified'] = hepdata_submission.reviewers_notified
1✔
155

156
        ctx['record']['last_updated'] = hepdata_submission.last_updated
1✔
157
        ctx['record']['hepdata_doi'] = "{0}".format(hepdata_submission.doi)
1✔
158
        if hepdata_submission.doi:
1✔
159
            ctx['record']['hepdata_doi'] += ".v{0}".format(ctx['version'])
1✔
160

161
        ctx['recid'] = recid
1✔
162
        ctx["status"] = hepdata_submission.overall_status
1✔
163
        ctx['record']['data_abstract'] = sanitize_html(decode_string(hepdata_submission.data_abstract))
1✔
164

165
        extract_journal_info(record)
1✔
166

167
        if hepdata_submission.overall_status != 'finished' and ctx["version_count"] > 0:
1✔
168
            if not (ctx['show_review_widget']
1✔
169
                    or ctx['show_upload_widget']
170
                    or ctx['is_submission_coordinator_or_admin']):
171

NEW
172
                if not observer_view:
×
173
                    # we show the latest approved version.
NEW
UNCOV
174
                    ctx["version"] -= 1
×
NEW
UNCOV
175
                    ctx["version_count"] -= 1
×
176
                else:
NEW
UNCOV
177
                    ctx["version_count"] += 1
×
178

179

180
        ctx['additional_resources'] = submission_has_resources(hepdata_submission)
1✔
181
        ctx['resources_with_doi'] = []
1✔
182
        for resource in hepdata_submission.resources:
1✔
183
            if resource.doi:
1✔
184
                ctx['resources_with_doi'].append({
1✔
185
                    'filename': os.path.basename(resource.file_location),
186
                    'description': resource.file_description,
187
                    'doi': resource.doi
188
                })
189

190
        # query for a related data submission
191
        data_record_query = DataSubmission.query.filter_by(
1✔
192
            publication_recid=recid,
193
            version=ctx["version"]).order_by(DataSubmission.id.asc())
194

195
        format_tables(ctx, data_record_query, data_table, recid)
1✔
196

197
        ctx['access_count'] = get_count(recid)
1✔
198
        ctx['mode'] = 'record'
1✔
199
        ctx['coordinator'] = hepdata_submission.coordinator
1✔
200
        ctx['coordinators'] = get_coordinators_in_system()
1✔
201
        ctx['record'].pop('authors', None)
1✔
202

203
    return ctx
1✔
204

205

206
def format_tables(ctx, data_record_query, data_table, recid):
1✔
207
    """
208
    Finds all the tables related to a submission and formats
209
    them for display in the UI or as JSON.
210

211
    :return:
212
    """
213
    first_data_id = -1
1✔
214
    data_table_metadata, first_data_id = process_data_tables(
1✔
215
        ctx, data_record_query, first_data_id, data_table)
216
    assign_or_create_review_status(data_table_metadata, recid, ctx["version"])
1✔
217
    ctx['watched'] = is_current_user_subscribed_to_record(recid)
1✔
218
    ctx['data_tables'] = list(data_table_metadata.values())
1✔
219
    ctx['table_id_to_show'] = first_data_id
1✔
220
    ctx['table_name_to_show'] = ''
1✔
221
    matching_tables = list(filter(
1✔
222
        lambda data_table: data_table['id'] == first_data_id,
223
        ctx['data_tables']))
224
    if matching_tables:
1✔
225
        ctx['table_name_to_show'] = matching_tables[0]['name']
1✔
226
    if 'table' in request.args:
1✔
227
        if request.args['table']:
1✔
228
            table_from_args = request.args['table']
1✔
229
            # Check for table name in list of data tables.
230
            matching_tables = list(filter(
1✔
231
                lambda data_table: data_table['name'] == table_from_args,
232
                ctx['data_tables']))
233
            if not matching_tables:
1✔
234
                # Check for processed table name in list of data tables.
235
                matching_tables = list(filter(
1✔
236
                    lambda data_table: data_table['processed_name'] == table_from_args,
237
                    ctx['data_tables']))
238
            if matching_tables:
1✔
239
                # Set table ID and name to the first matching table.
240
                ctx['table_id_to_show'] = matching_tables[0]['id']
1✔
241
                ctx['table_name_to_show'] = matching_tables[0]['name']
1✔
242

243

244
def format_resource(resource, contents, content_url):
1✔
245
    """
246
    Gets info about a resource ready to be displayed on the resource's
247
    landing page
248

249
    :param resource: DataResource object to be displayed
250
    :param contents: Resource file contents
251

252
    :return: context dictionary ready for the template
253
    """
254
    hepsubmission = HEPSubmission.query.filter(HEPSubmission.resources.any(id=resource.id)).first()
1✔
255
    if not hepsubmission:
1✔
UNCOV
256
        datasubmission = DataSubmission.query.filter(DataSubmission.resources.any(id=resource.id)).first()
×
UNCOV
257
        if datasubmission:
×
258
            hepsubmission = HEPSubmission.query.filter_by(
×
259
                publication_recid=datasubmission.publication_recid,
260
                version=datasubmission.version
261
            ).first()
UNCOV
262
        if not hepsubmission:
×
263
            # Look for DataSubmission mapping to this resource
UNCOV
264
            raise ValueError("Unable to find publication for resource %d. (Is it a data file?)", resource.id)
×
265

266
    record = get_record_contents(hepsubmission.publication_recid)
1✔
267
    ctx = format_submission(hepsubmission.publication_recid, record,
1✔
268
                            hepsubmission.version, 1, hepsubmission)
269
    ctx['record_type'] = 'resource'
1✔
270
    ctx['resource'] = resource
1✔
271
    ctx['contents'] = contents
1✔
272
    ctx['content_url'] = content_url
1✔
273
    ctx['resource_url'] = request.url
1✔
274
    ctx['related_publication_id'] = hepsubmission.publication_recid
1✔
275
    ctx['file_mimetype'] = get_resource_mimetype(resource, contents)
1✔
276
    ctx['resource_filename'] = os.path.basename(resource.file_location)
1✔
277
    ctx['resource_filetype'] = f'{resource.file_type} File'
1✔
278
    ctx['related_recids'] = get_record_data_list(hepsubmission, "related")
1✔
279
    ctx['related_to_this_recids'] = get_record_data_list(hepsubmission, "related_to_this")
1✔
280

281
    if resource.file_type in IMAGE_TYPES:
1✔
282
        ctx['display_type'] = 'image'
×
283
    elif resource.file_location.lower().startswith('http'):
1✔
284
        ctx['display_type'] = 'link'
×
UNCOV
285
        ctx['resource_filename'] = 'External Link'
×
UNCOV
286
        ctx['resource_filetype'] = 'External Link'
×
287
    elif contents == 'Binary':
1✔
UNCOV
288
        ctx['display_type'] = 'binary'
×
289
    else:
290
        ctx['display_type'] = 'code'
1✔
291

292
    ctx['json_ld'] = get_json_ld(
1✔
293
        ctx,
294
        hepsubmission.overall_status
295
    )
296

297
    return ctx
1✔
298

299

300
def get_resource_mimetype(resource, contents):
1✔
301
    file_mimetype = mimetypes.guess_type(resource.file_location)[0]
1✔
302
    if file_mimetype is None:
1✔
303
        if contents == 'Binary':
1✔
304
            file_mimetype = 'application/octet-stream'
1✔
305
        else:
306
            file_mimetype = 'text/plain'
1✔
307
    return file_mimetype
1✔
308

309

310
def should_send_json_ld(request):
1✔
311
    """Determine whether to send JSON-LD instead of HTML for this request
312

313
    :param type request: flask.Request object
314
    :return: True if request accepts JSON-LD; False otherwise
315
    :rtype: bool
316

317
    """
318
    # Determine whether to send JSON-LD
319
    return any([request.accept_mimetypes.quality(m) >= 1 for m in JSON_LD_MIMETYPES])
1✔
320

321

322
def get_commit_message(ctx, recid):
1✔
323
    """
324
    Returns a commit message for the current version if present.
325
    Will return the highest ID of a version-recid pairing.
326

327
    :param ctx:
328
    :param recid:
329
    """
330
    try:
1✔
331
        # Select the most recent commit (greatest ID)
332
        commit_message_query = RecordVersionCommitMessage.query \
1✔
333
            .filter_by(version=ctx["version"], recid=recid) \
334
            .order_by(RecordVersionCommitMessage.id.desc())
335

336
        if commit_message_query.count() > 0:
1✔
337
            commit_message = commit_message_query.first()
1✔
338
            ctx["revision_message"] = {
1✔
339
                'version': commit_message.version,
340
                'message': commit_message.message}
341

UNCOV
342
    except NoResultFound:
×
UNCOV
343
        pass
×
344

345

346
def create_breadcrumb_text(authors, ctx, record):
1✔
347
    """Creates the breadcrumb text for a submission."""
348
    if "first_author" in record and 'full_name' in record["first_author"] \
1✔
349
            and record["first_author"]["full_name"] is not None:
350
        ctx['breadcrumb_text'] = record["first_author"]["full_name"]
1✔
351
    elif authors and authors[0] and 'full_name' in authors[0] \
1✔
352
            and authors[0]["full_name"] is not None:
353
        ctx['breadcrumb_text'] = authors[0]["full_name"]
1✔
354

355
    if authors is not None and len(authors) > 1:
1✔
356
        ctx['breadcrumb_text'] += " et al."
1✔
357

358

359
def submission_has_resources(hepsubmission):
1✔
360
    """
361
    Returns whether the submission has resources attached.
362

363
    :param hepsubmission: HEPSubmission object
364
    :return: bool
365
    """
366
    return len(hepsubmission.resources) > 0
1✔
367

368

369
def extract_journal_info(record):
1✔
370
    if record and 'type' in record:
1✔
371
        if 'thesis' in record['type']:
1✔
372
            if 'type' in record['dissertation']:
×
UNCOV
373
                record['journal_info'] = record['dissertation']['type'] + ", " + record['dissertation'][
×
374
                    'institution']
375
            else:
UNCOV
376
                record['journal_info'] = "PhD Thesis"
×
377
        elif 'conference paper' in record['type']:
1✔
UNCOV
378
            record['journal_info'] = "Conference Paper"
×
379

380

381
def render_record(recid, record, version, output_format, light_mode=False, observer_key=None):
1✔
382

383
    # Count number of all versions and number of finished versions of a publication record.
384
    version_count_all = HEPSubmission.query.filter(HEPSubmission.publication_recid == recid,
1✔
385
                                                   and_(HEPSubmission.overall_status != 'sandbox',
386
                                                        HEPSubmission.overall_status != 'sandbox_processing')).count()
387
    version_count_finished = HEPSubmission.query.filter_by(publication_recid=recid, overall_status='finished').count()
1✔
388

389
    # Number of versions that a user is allowed to access based on their permissions.
390
    version_count = version_count_all if user_allowed_to_perform_action(recid) else version_count_finished
1✔
391
    key_verified = verify_observer_key(recid, observer_key)
1✔
392

393
    # If version not given explicitly, take to be latest allowed version (or 1 if there are no allowed versions).
394
    # Unless we have a verified observer key, then we should just select the latest version of ANY status.
395
    if version == -1:
1✔
396
        if key_verified:
1✔
397
            version = version_count_all
1✔
398
        else:
399
            version = version_count if version_count else 1
1✔
400

401
    # We skip the version check if the access key matches
402
    if not key_verified:
1✔
403
        # Check for a user trying to access a version of a publication record where they don't have permissions.
404
        if version_count < version_count_all and version == version_count_all:
1✔
405
            # Prompt the user to login if they are not authenticated then redirect, otherwise return a 403 error.
406
            if not current_user.is_authenticated:
1✔
407
                redirect_url_after_login = '%2Frecord%2F{0}%3Fversion%3D{1}%26format%3D{2}'.format(recid, version, output_format)
1✔
408
                if 'table' in request.args:
1✔
NEW
UNCOV
409
                    redirect_url_after_login += '%26table%3D{0}'.format(request.args['table'])
×
410
                if output_format.startswith('yoda') and 'rivet' in request.args:
1✔
NEW
UNCOV
411
                    redirect_url_after_login += '%26rivet%3D{0}'.format(request.args['rivet'])
×
412
                if output_format.startswith('yoda') and 'qualifiers' in request.args:
1✔
NEW
413
                    redirect_url_after_login += '%26qualifiers%3D{0}'.format(request.args['qualifiers'])
×
414
                return redirect('/login/?next={0}'.format(redirect_url_after_login))
1✔
415
            else:
NEW
UNCOV
416
                abort(403)
×
417

418
    hepdata_submission = get_latest_hepsubmission(publication_recid=recid, version=version)
1✔
419

420
    if hepdata_submission is not None:
1✔
421
        if hepdata_submission.overall_status == 'processing':
1✔
422
            ctx = {'recid': recid}
×
423
            determine_user_privileges(recid, ctx)
×
424
            return render_template('hepdata_records/publication_processing.html', ctx=ctx)
×
425

426
        elif not hepdata_submission.overall_status.startswith('sandbox'):
1✔
427
            ctx = format_submission(recid, record, version, version_count, hepdata_submission, observer_view=key_verified)
1✔
428
            ctx['record_type'] = 'publication'
1✔
429
            ctx['related_recids'] = get_record_data_list(hepdata_submission, "related")
1✔
430
            ctx['related_to_this_recids'] = get_record_data_list(hepdata_submission, "related_to_this")
1✔
431
            ctx['overall_status'] = hepdata_submission.overall_status
1✔
432

433

434
            if key_verified and observer_key:
1✔
435
                ctx['observer_key'] = observer_key
1✔
436
            elif hepdata_submission.overall_status == 'todo':
1✔
437
                observer = get_or_create_submission_observer(hepdata_submission.publication_recid)
1✔
438

439
                if key_verified and has_coordinator_permissions(recid, current_user):
1✔
NEW
UNCOV
440
                    ctx['observer_key'] = observer.observer_key
×
441

442
            increment(recid)
1✔
443

444
            if output_format == 'html' or output_format == 'json_ld':
1✔
445
                ctx['json_ld'] = get_json_ld(
1✔
446
                    ctx,
447
                    hepdata_submission.overall_status
448
                )
449

450
                if output_format == 'json_ld':
1✔
451
                    status_code = 404 if 'error' in ctx['json_ld'] else 200
1✔
452
                    return jsonify(ctx['json_ld']), status_code
1✔
453

454
                if output_format == 'html':
1✔
455
                    return render_template('hepdata_records/publication_record.html', ctx=ctx)
1✔
456

457
            elif 'table' not in request.args:
1✔
458
                if output_format == 'json':
1✔
459
                    ctx = process_ctx(ctx, light_mode)
1✔
460
                    return jsonify(ctx)
1✔
461
                else:
462
                    return redirect(
×
463
                        url_for('converter.download_submission_with_recid',
464
                                recid=recid, version=version, file_format=output_format,
465
                                rivet=request.args.get('rivet', None),
466
                                qualifiers=request.args.get('qualifiers', None)))
467
            else:
UNCOV
468
                table_name = request.args['table'].replace('%', '%25').replace('\\', '%5C')
×
469
                if hepdata_submission.inspire_id:
×
UNCOV
470
                    return redirect(
×
471
                        url_for('converter.download_data_table_by_inspire_id',
472
                                inspire_id='ins{}'.format(hepdata_submission.inspire_id),
473
                                table_name=table_name,
474
                                version=version, file_format=output_format,
475
                                rivet=request.args.get('rivet', None),
476
                                qualifiers=request.args.get('qualifiers', None)))
477
                else:
UNCOV
478
                    return redirect(
×
479
                        url_for('converter.download_data_table_by_recid', recid=recid,
480
                                table_name=table_name,
481
                                version=version, file_format=output_format,
482
                                rivet=request.args.get('rivet', None),
483
                                qualifiers=request.args.get('qualifiers', None)))
484
        else:
UNCOV
485
            abort(404)
×
486

487
    elif record is not None:  # this happens when we access an id of a data record
1✔
488
        # in which case, we find the related publication, and
489
        # make the front end focus on the relevant data table.
490
        try:
1✔
491
            publication_recid = int(record['related_publication'])
1✔
492
            publication_record = get_record_contents(publication_recid)
1✔
493

494
            datasubmission = DataSubmission.query.filter_by(associated_recid=recid).one()
1✔
495
            hepdata_submission = get_latest_hepsubmission(publication_recid=publication_recid,
1✔
496
                                                          version=datasubmission.version)
497

498
            ctx = format_submission(publication_recid, publication_record,
1✔
499
                                    datasubmission.version, 1, hepdata_submission,
500
                                    data_table=record['title'])
501
            ctx['record_type'] = 'table'
1✔
502
            ctx['related_publication_id'] = publication_recid
1✔
503
            ctx['table_name'] = record['title']
1✔
504
            ctx['related_recids'] = get_record_data_list(hepdata_submission, "related")
1✔
505
            ctx['related_to_this_recids'] = get_record_data_list(hepdata_submission, "related_to_this")
1✔
506

507
            if output_format == 'html' or output_format == 'json_ld':
1✔
508
                ctx['json_ld'] = get_json_ld(
1✔
509
                    ctx,
510
                    hepdata_submission.overall_status,
511
                    datasubmission
512
                )
513

514
                if output_format == 'json_ld':
1✔
515
                    status_code = 404 if 'error' in ctx['json_ld'] else 200
1✔
516
                    return jsonify(ctx['json_ld']), status_code
1✔
517

UNCOV
518
                return render_template('hepdata_records/related_record.html', ctx=ctx)
×
519

520
            else:
UNCOV
521
                return redirect(
×
522
                    url_for('converter.download_data_table_by_recid',
523
                            recid=publication_recid,
524
                            table_name=ctx['table_name'].replace('%', '%25').replace('\\', '%5C'),
525
                            version=datasubmission.version, file_format=output_format,
526
                            rivet=request.args.get('rivet', None),
527
                            qualifiers=request.args.get('qualifiers', None)))
528

UNCOV
529
        except Exception as e:
×
UNCOV
530
            abort(404)
×
531
    else:
532
        abort(404)
1✔
533

534

535
def has_upload_permissions(recid, user, is_sandbox=False):
1✔
536
    if has_role(user, 'admin'):
1✔
537
        return True
1✔
538

539
    if is_sandbox:
1✔
UNCOV
540
        hepsubmission_record = get_latest_hepsubmission(publication_recid=recid, overall_status='sandbox')
×
UNCOV
541
        return hepsubmission_record is not None and hepsubmission_record.coordinator == user.id
×
542

543
    participant = SubmissionParticipant.query.filter_by(user_account=user.id,
1✔
544
        role='uploader', publication_recid=recid, status='primary').first()
545
    if participant:
1✔
546
        return True
1✔
547

548
def has_coordinator_permissions(recid, user, is_sandbox=False):
1✔
549
    if has_role(user, 'admin'):
1✔
550
        return True
1✔
551

552
    coordinator_record = HEPSubmission.query.filter_by(
1✔
553
        publication_recid=recid,
554
        coordinator=user.get_id()).first()
555
    return coordinator_record is not None
1✔
556

557

558
def create_new_version(recid, user, notify_uploader=True, uploader_message=None):
1✔
559
    hepsubmission = get_latest_hepsubmission(publication_recid=recid)
1✔
560

561
    if hepsubmission.overall_status == 'finished':
1✔
562
        # Reopen the submission to allow for revisions,
563
        # by creating a new HEPSubmission object.
564
        _rev_hepsubmission = HEPSubmission(publication_recid=recid,
1✔
565
                                           overall_status='todo',
566
                                           inspire_id=hepsubmission.inspire_id,
567
                                           coordinator=hepsubmission.coordinator,
568
                                           version=hepsubmission.version + 1)
569

570
        # Gets a generated or new SubmissionObserver object
571
        observer_key = get_or_create_submission_observer(_rev_hepsubmission.publication_recid, regenerate=True)
1✔
572

573
        db.session.add(_rev_hepsubmission)
1✔
574
        db.session.add(observer_key)
1✔
575
        db.session.commit()
1✔
576

577
        record_information = get_record_by_id(recid)
1✔
578

579
        if notify_uploader:
1✔
580
            uploaders = SubmissionParticipant.query.filter_by(
1✔
581
                role='uploader', publication_recid=recid, status='primary'
582
                )
583
            for uploader in uploaders:
1✔
584
                send_cookie_email(uploader,
1✔
585
                                  record_information,
586
                                  message=uploader_message,
587
                                  version=_rev_hepsubmission.version)
588

589
        if user:
1✔
590
            # Send the submission created email. containing no uploader, or reviewer information.
591
            notify_submission_created(record_information, user.id, None, None, revision=True)
1✔
592

593
        return jsonify({'success': True, 'version': _rev_hepsubmission.version})
1✔
594
    else:
595
        return jsonify({"message": f"Rec id {recid} is not finished so cannot create a new version"}), 400
1✔
596

597

598
def process_payload(recid, file, redirect_url, synchronous=False):
1✔
599
    """Process an uploaded file
600

601
    :param recid: int
602
        The id of the record to update
603
    :param file: file
604
        The file to process
605
    :param redirect_url: string
606
        Redirect URL to record, for use if the upload fails or in synchronous mode
607
    :param synchronous: bool
608
        Whether to process asynchronously via celery (default) or immediately (only recommended for tests)
609
    :return: JSONResponse either containing 'url' (for success cases) or
610
             'message' (for error cases, which will give a 400 error).
611
    """
612

613
    if file and (allowed_file(file.filename)):
1✔
614
        file_path = save_zip_file(file, recid)
1✔
615
        file_size = os.path.getsize(file_path)
1✔
616
        UPLOAD_MAX_SIZE = current_app.config.get('UPLOAD_MAX_SIZE', 52000000)
1✔
617
        if file_size > UPLOAD_MAX_SIZE:
1✔
618
            return jsonify({"message":
1✔
619
                "{} too large ({} bytes > {} bytes)".format(
620
                    file.filename, file_size, UPLOAD_MAX_SIZE)}), 413
621

622
        hepsubmission = get_latest_hepsubmission(publication_recid=recid)
1✔
623

624
        if hepsubmission.overall_status == 'finished':
1✔
625
            # If it is finished and we receive an update,
626
            # then we need to reopen the submission to allow for revisions,
627
            # by creating a new HEPSubmission object.
628
            _rev_hepsubmission = HEPSubmission(publication_recid=recid,
1✔
629
                                               overall_status='todo',
630
                                               inspire_id=hepsubmission.inspire_id,
631
                                               coordinator=hepsubmission.coordinator,
632
                                               version=hepsubmission.version + 1)
633
            db.session.add(_rev_hepsubmission)
1✔
634
            hepsubmission = _rev_hepsubmission
1✔
635

636
        previous_status = hepsubmission.overall_status
1✔
637
        hepsubmission.overall_status = 'sandbox_processing' if previous_status == 'sandbox' else 'processing'
1✔
638
        db.session.add(hepsubmission)
1✔
639
        db.session.commit()
1✔
640

641
        if synchronous:
1✔
642
            process_saved_file(file_path, recid, current_user.get_id(), redirect_url, previous_status)
1✔
643
        else:
644
            process_saved_file.delay(file_path, recid, current_user.get_id(), redirect_url, previous_status)
1✔
645
            flash('File saved. You will receive an email when the file has been processed.', 'info')
1✔
646

647
        return jsonify({'url': redirect_url.format(recid)})
1✔
648
    else:
649
        return jsonify({"message": "You must upload a .zip, .tar, .tar.gz or .tgz file" +
1✔
650
                        " (or a .oldhepdata or single .yaml or .yaml.gz file)."}), 400
651

652

653
@shared_task
1✔
654
def process_saved_file(file_path, recid, userid, redirect_url, previous_status):
1✔
655
    try:
1✔
656
        hepsubmission = get_latest_hepsubmission(publication_recid=recid)
1✔
657
        if hepsubmission.overall_status != 'processing' and hepsubmission.overall_status != 'sandbox_processing':
1✔
UNCOV
658
            log.error('Record {} is not in a processing state.'.format(recid))
×
UNCOV
659
            return
×
660

661
        errors = process_zip_archive(file_path, recid)
1✔
662

663
        uploader = User.query.get(userid)
1✔
664
        site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
1✔
665

666
        submission_participant = SubmissionParticipant.query.filter_by(
1✔
667
            publication_recid=recid, user_account=userid, role='uploader').first()
668
        if submission_participant:
1✔
UNCOV
669
            full_name = submission_participant.full_name
×
670
        else:
671
            full_name = uploader.email
1✔
672

673
        if errors:
1✔
UNCOV
674
            cleanup_submission(recid, hepsubmission.version, [])  # delete all tables if errors
×
675
            message_body = render_template('hepdata_theme/email/upload_errors.html',
×
676
                                           name=full_name,
677
                                           article=recid,
678
                                           redirect_url=redirect_url.format(recid),
679
                                           errors=errors,
680
                                           site_url=site_url)
681

UNCOV
682
            create_send_email_task(uploader.email,
×
683
                                   '[HEPData] Submission {0} upload failed'.format(recid),
684
                                   message_body)
685
        else:
686
            update_action_for_submission_participant(recid, userid, 'uploader')
1✔
687
            message_body = render_template('hepdata_theme/email/upload_complete.html',
1✔
688
                                           name=full_name,
689
                                           article=recid,
690
                                           link=redirect_url.format(recid),
691
                                           site_url=site_url,
692
                                           overall_status=hepsubmission.overall_status)
693

694
            create_send_email_task(uploader.email,
1✔
695
                                   '[HEPData] Submission {0} upload succeeded'.format(recid),
696
                                   message_body)
697

698
        # Reset the status of the submission back to the previous value.
699
        hepsubmission.overall_status = previous_status
1✔
700
        db.session.add(hepsubmission)
1✔
701
        db.session.commit()
1✔
702

703
        # Delete any previous upload folders relating to non-final versions
704
        # of this hepsubmission
705
        cleanup_old_files(hepsubmission)
1✔
706

707
    except Exception as e:
1✔
708
        # Reset the status and send error emails, unless we're working
709
        # asynchronously and celery is about to retry
710
        if not process_saved_file.request.id \
1✔
711
                or process_saved_file.request.retries >= process_saved_file.max_retries:
712
            try:
1✔
713
                cleanup_submission(recid, hepsubmission.version, [])
1✔
714
                errors = {
1✔
715
                    "Unexpected error": [{
716
                        "level": "error",
717
                        "message": "An unexpected error occurred: {}".format(e)
718
                    }]
719
                }
720
                uploader = User.query.get(userid)
1✔
721
                site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
1✔
722
                message_body = render_template('hepdata_theme/email/upload_errors.html',
1✔
723
                                               name=uploader.email,
724
                                               article=recid,
725
                                               redirect_url=redirect_url.format(recid),
726
                                               errors=errors,
727
                                               site_url=site_url)
728

729
                create_send_email_task(uploader.email,
1✔
730
                                       '[HEPData] Submission {0} upload failed'.format(recid),
731
                                       message_body)
732
                log.error("Final attempt of process_saved_file for recid %s failed. Resetting to previous status." % recid)
1✔
733

734
                # Reset the status of the submission back to the previous value.
735
                hepsubmission.overall_status = previous_status
1✔
736
                db.session.add(hepsubmission)
1✔
737
                db.session.commit()
1✔
738

739
            except Exception as ex:
1✔
740
                log.error("Exception while cleaning up: %s" % ex)
1✔
741

742
        else:
UNCOV
743
            log.debug("Celery will retry task, attempt %s" % process_saved_file.request.retries)
×
UNCOV
744
            raise e
×
745

746

747
def save_zip_file(file, id):
1✔
748
    filename = secure_filename(file.filename)
1✔
749
    time_stamp = str(int(round(time.time())))
1✔
750
    file_save_directory = get_data_path_for_record(str(id), time_stamp)
1✔
751

752
    if filename.endswith('.oldhepdata'):
1✔
753
        file_save_directory = os.path.join(file_save_directory, 'oldhepdata')
1✔
754

755
    if not os.path.exists(file_save_directory):
1✔
756
        os.makedirs(file_save_directory)
1✔
757
    file_path = os.path.join(file_save_directory, filename)
1✔
758

759
    print('Saving file to {}'.format(file_path))
1✔
760
    file.save(file_path)
1✔
761
    return file_path
1✔
762

763

764
def process_zip_archive(file_path, id, old_schema=False):
1✔
765
    (file_save_directory, filename) = os.path.split(file_path)
1✔
766

767
    if not filename.endswith('.oldhepdata'):
1✔
768
        file_save_directory = os.path.dirname(file_path)
1✔
769
        submission_path = os.path.join(file_save_directory, remove_file_extension(filename))
1✔
770
        submission_temp_path = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"])
1✔
771

772
        if filename.endswith('.yaml.gz'):
1✔
773
            print('Extracting: {} to {}'.format(file_path, file_path[:-3]))
1✔
774
            if not extract(file_path, file_path[:-3]):
1✔
775
                message = clean_error_message_for_display(
1✔
776
                    "{} is not a valid .gz file.".format(file_path),
777
                    file_save_directory
778
                )
779
                return {
1✔
780
                    "Archive file extractor": [{
781
                        "level": "error",
782
                        "message": message
783
                    }]
784
                }
785
            return process_zip_archive(file_path[:-3], id,
1✔
786
                                       old_schema=False)
787
        elif filename.endswith('.yaml'):
1✔
788
            # we split the singular yaml file and create a submission directory
789
            error, last_updated = split_files(file_path, submission_temp_path)
1✔
790
            if error:
1✔
791
                message = clean_error_message_for_display(
1✔
792
                    str(error),
793
                    file_save_directory
794
                )
795
                return {
1✔
796
                    "Single YAML file splitter": [{
797
                        "level": "error",
798
                        "message": message
799
                    }]
800
                }
801
        else:
802
            # we are dealing with a zip, tar, etc. so we extract the contents
803
            try:
1✔
804
                unzipped_path = extract(file_path, submission_temp_path)
1✔
805
            except Exception as e:
1✔
806
                # Log the exception and raise it so that celery can retry
807
                log.exception(f"Unable to extract file {file_path}")
1✔
808
                message = clean_error_message_for_display(
1✔
809
                    "Unable to extract file {}. Please check the file is a valid zip or tar archive file and try again later. Contact info@hepdata.net if problems persist.".format(file_path),
810
                    file_save_directory
811
                )
812
                raise ValueError(message) from e
1✔
813

814
            if not unzipped_path:
1✔
815
                message = clean_error_message_for_display(
1✔
816
                    "{} is not a valid zip or tar archive file.".format(file_path),
817
                    file_save_directory
818
                )
819
                return {
1✔
820
                    "Archive file extractor": [{
821
                        "level": "error", "message": message
822
                    }]
823
                }
824

825
        copy_errors = move_files(submission_temp_path, submission_path)
1✔
826
        if copy_errors:
1✔
827
            return copy_errors
1✔
828

829
        submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml")
1✔
830

831
        if not submission_found:
1✔
UNCOV
832
            return {
×
833
                "Archive file extractor": [{
834
                    "level": "error", "message": "No submission.yaml file has been found in the archive."
835
                }]
836
            }
837

838
        basepath, submission_file_path = submission_found
1✔
839

840
    else:
841
        file_dir = os.path.dirname(file_save_directory)
1✔
842
        time_stamp = os.path.split(file_dir)[1]
1✔
843
        result = check_and_convert_from_oldhepdata(os.path.dirname(file_save_directory), id, time_stamp)
1✔
844

845
        # Check for errors
846
        if type(result) == dict:
1✔
UNCOV
847
            return result
×
848
        else:
849
            basepath, submission_file_path = result
1✔
850

851
    return process_submission_directory(basepath, submission_file_path, id,
1✔
852
                                        old_schema=old_schema)
853

854

855
def check_and_convert_from_oldhepdata(input_directory, id, timestamp):
1✔
856
    """
857
    Check if the input directory contains a .oldhepdata file
858
    and convert it to YAML if it happens.
859
    """
860
    converted_path = get_data_path_for_record(str(id), timestamp, 'yaml')
1✔
861

862
    oldhepdata_found = find_file_in_directory(
1✔
863
        input_directory,
864
        lambda x: x.endswith('.oldhepdata'),
865
    )
866
    if not oldhepdata_found:
1✔
UNCOV
867
        return {
×
868
            "Converter": [{
869
                "level": "error",
870
                "message": "No file with .oldhepdata extension has been found."
871
            }]
872
        }
873

874
    converted_temp_dir = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"])
1✔
875
    converted_temp_path = os.path.join(converted_temp_dir, 'yaml')
1✔
876

877
    try:
1✔
878
        successful = convert_oldhepdata_to_yaml(oldhepdata_found[1], converted_temp_path)
1✔
879
        if not successful:
1✔
880
            # Parse error message from title of HTML file, removing part of string after final "//".
UNCOV
881
            with open(converted_temp_path) as converted_temp_file:
×
UNCOV
882
                soup = BeautifulSoup(converted_temp_file, "html.parser")
×
883
            errormsg = soup.title.string.rsplit("//", 1)[0]
×
884

885
    except Error as error:  # hepdata_converter_ws_client.Error
×
UNCOV
886
        successful = False
×
UNCOV
887
        errormsg = str(error)
×
888

889
    if not successful:
1✔
UNCOV
890
        shutil.rmtree(converted_temp_dir, ignore_errors=True)  # can uncomment when this is definitely working
×
891

UNCOV
892
        return {
×
893
            "Converter": [{
894
                "level": "error",
895
                "message": "The conversion from oldhepdata "
896
                           "to the YAML format has not succeeded. "
897
                           "Error message from converter follows:<br/><br/>" + errormsg
898
            }]
899
        }
900
    else:
901
        copy_errors = move_files(converted_temp_path, converted_path)
1✔
902
        if copy_errors:
1✔
UNCOV
903
            return copy_errors
×
904

905
    return find_file_in_directory(converted_path, lambda x: x == "submission.yaml")
1✔
906

907

908
def move_files(submission_temp_path, submission_path):
1✔
909
    print('Copying files from {} to {}'.format(submission_temp_path + '/.', submission_path))
1✔
910
    try:
1✔
911
        shutil.rmtree(submission_path, ignore_errors=True)
1✔
912
        shutil.copytree(submission_temp_path, submission_path, symlinks=False)
1✔
913
    except shutil.Error as e:
1✔
914
        errors = []
1✔
915
        for srcname, dstname, exception in e.args[0]:
1✔
916
            # Remove full paths from filenames before sending error message to user
917
            filename = srcname.replace(submission_temp_path + '/', '')
1✔
918
            msg = str(exception).replace(submission_temp_path + '/', '').replace(submission_path + '/', '')
1✔
919
            errors.append({
1✔
920
                "level": "error",
921
                "message": 'Invalid file {}: {}'.format(filename, msg)
922
            })
923

924
        return {
1✔
925
            "Exceptions when copying files": errors
926
        }
927
    except Exception as e:
1✔
928
        # Remove full paths from filenames before sending error message to user
929
        msg = str(e).replace(submission_temp_path + '/', '').replace(submission_path + '/', '')
1✔
930
        return {
1✔
931
            "Exceptions when copying files": [{
932
                "level": "error",
933
                "message": msg
934
            }]
935
        }
936

937
    finally:
938
        shutil.rmtree(submission_temp_path, ignore_errors=True)
1✔
939

940

941
def query_messages_for_data_review(data_review_record, messages):
1✔
942
    if data_review_record.messages:
1✔
943
        data_messages = data_review_record.messages
1✔
944
        data_messages.sort(key=lambda data_message: data_message.id, reverse=True)
1✔
945
        for data_message in data_messages:
1✔
946
            current_user_obj = get_user_from_id(data_message.user)
1✔
947
            messages.append(
1✔
948
                {"message": data_message.message,
949
                 "user": current_user_obj.email,
950
                 "post_time": data_message.creation_date})
951

952
    return messages
1✔
953

954

955
def assign_or_create_review_status(data_table_metadata, publication_recid,
1✔
956
                                   version):
957
    """
958
    If a review already exists, it will be attached to the current data record.
959
    If a review does not exist for a data table, it will be created.
960

961
    :param data_table_metadata: the metadata describing the main table.
962
    :param publication_recid: publication record id
963
    :param version:
964
    """
965
    data_review_query = DataReview.query.filter_by(
1✔
966
        publication_recid=publication_recid, version=version)
967
    # this method should also create all the DataReviews for data_tables that
968
    # are not currently present to avoid
969
    # only creating data reviews when the review is clicked explicitly.
970
    assigned_tables = []
1✔
971
    if data_review_query.count() > 0:
1✔
972
        data_review_records = data_review_query.all()
1✔
973

974
        for data_review in data_review_records:
1✔
975
            if data_review.data_recid in data_table_metadata:
1✔
976
                data_table_metadata[data_review.data_recid][
1✔
977
                    "review_flag"] = data_review.status
978
                data_table_metadata[data_review.data_recid]["review_status"] = \
1✔
979
                    RECORD_PLAIN_TEXT[data_review.status]
980
                data_table_metadata[data_review.data_recid]["messages"] = len(
1✔
981
                    data_review.messages) > 0
982
                assigned_tables.append(data_review.data_recid)
1✔
983

984
    # now create the missing data reviews
985
    for data_table_id in data_table_metadata:
1✔
986
        if data_table_id not in assigned_tables:
1✔
987
            data_record = create_data_review(
1✔
988
                data_table_id, publication_recid, version=version)
989
            data_table_metadata[data_table_id][
1✔
990
                "review_flag"] = data_record.status
991
            data_table_metadata[data_table_id]["review_status"] = \
1✔
992
                RECORD_PLAIN_TEXT[data_record.status]
993

994

995
def determine_user_privileges(recid, ctx):
1✔
996
    # show_review_area = not show_upload_area
997
    ctx['show_review_widget'] = False
1✔
998
    ctx['show_upload_widget'] = False
1✔
999
    ctx['is_submission_coordinator_or_admin'] = False
1✔
1000
    ctx['is_admin'] = False
1✔
1001

1002
    if current_user.is_authenticated:
1✔
1003
        user_id = current_user.get_id()
1✔
1004
        participant_records = get_submission_participants_for_record(recid, user_account=user_id)
1✔
1005

1006
        for participant_record in participant_records:
1✔
UNCOV
1007
            if participant_record is not None:
×
UNCOV
1008
                if participant_record.role == 'reviewer' and participant_record.status == 'primary':
×
UNCOV
1009
                    ctx['show_review_widget'] = True
×
1010

UNCOV
1011
                if participant_record.role == 'uploader' and participant_record.status == 'primary':
×
1012
                    ctx['show_upload_widget'] = True
×
1013

1014
        user = User.query.get(current_user.get_id())
1✔
1015
        if has_role(user, 'admin'):
1✔
1016
            ctx['is_submission_coordinator_or_admin'] = True
1✔
1017
            ctx['is_admin'] = True
1✔
1018
        else:
UNCOV
1019
            matching_records = HEPSubmission.query.filter_by(
×
1020
                publication_recid=recid,
1021
                coordinator=current_user.get_id()).count()
1022

UNCOV
1023
            if matching_records > 0:
×
UNCOV
1024
                ctx['is_submission_coordinator_or_admin'] = True
×
1025

1026
        ctx['show_upload_widget'] = (
1✔
1027
            ctx['show_upload_widget'] or ctx[
1028
                'is_submission_coordinator_or_admin'])
1029

1030

1031
def process_data_tables(ctx, data_record_query, first_data_id,
1✔
1032
                        data_table=None):
1033
    data_table_metadata = OrderedDict()
1✔
1034
    ctx['show_upload_area'] = False
1✔
1035

1036
    if ctx['show_upload_widget'] and data_record_query.count() == 0:
1✔
1037
        ctx['show_upload_area'] = True
1✔
1038
    elif data_record_query.count() > 0:
1✔
1039
        record_submissions = data_record_query.all()
1✔
1040
        for submission_record in record_submissions:
1✔
1041
            processed_name = "".join(submission_record.name.split())
1✔
1042
            data_table_metadata[submission_record.id] = {
1✔
1043
                "id": submission_record.id, "processed_name": processed_name,
1044
                "name": submission_record.name,
1045
                "location": submission_record.location_in_publication,
1046
                # Generate resource metadata
1047
                "resources": get_resource_data(submission_record),
1048
                "doi": submission_record.doi,
1049
                "description": sanitize_html(
1050
                    truncate_string(submission_record.description, 20),
1051
                    tags={},
1052
                    strip=True
1053
                )
1054
            }
1055

1056
            if first_data_id == -1:
1✔
1057
                first_data_id = submission_record.id
1✔
1058

1059
            if data_table:
1✔
1060
                if submission_record.name == data_table:
1✔
1061
                    first_data_id = submission_record.id
1✔
1062

1063
    return data_table_metadata, first_data_id
1✔
1064

1065

1066
def truncate_author_list(record, length=10):
1✔
1067
    record['authors'] = record['authors'][:length]
1✔
1068

1069

1070
def get_all_ids(index=None, id_field='recid', last_updated=None, latest_first=False):
1✔
1071
    """Get all record or inspire ids of publications in the search index
1072

1073
    :param index: name of index to use.
1074
    :param id_field: id type to return. Should be 'recid' or 'inspire_id'
1075
    :return: list of integer ids
1076
    """
1077
    if id_field not in ('recid', 'inspire_id'):
1✔
1078
        raise ValueError('Invalid ID field %s' % id_field)
1✔
1079

1080
    db_col = HEPSubmission.publication_recid if id_field == 'recid' \
1✔
1081
        else HEPSubmission.inspire_id
1082

1083
    # Get unique version
1084
    query = db.session.query(db_col) \
1✔
1085
        .filter(HEPSubmission.overall_status == 'finished')
1086

1087
    if last_updated:
1✔
1088
        query = query.filter(HEPSubmission.last_updated >= last_updated)
1✔
1089

1090
    if latest_first:
1✔
1091
        # Use a set to check for duplicates, as sorting by last_updated
1092
        # means distinct doesn't work (as it looks for distinct across both
1093
        # cols)
1094
        query = query.order_by(HEPSubmission.last_updated.desc())
1✔
1095
        seen = set()
1✔
1096
        seen_add = seen.add
1✔
1097
        return [
1✔
1098
            int(x[0]) for x in query.all() if not (x[0] in seen or seen_add(x[0]))
1099
        ]
1100
    else:
1101
        query = query.order_by(HEPSubmission.publication_recid).distinct()
1✔
1102
        return [int(x[0]) for x in query.all()]
1✔
1103

1104

1105
def get_related_hepsubmissions(submission):
1✔
1106
    """
1107
    Queries the database for all HEPSubmission objects contained in
1108
    this object's related record ID list.
1109
    (All submissions this one is relating to)
1110

1111
    :return: [list] A list of HEPSubmission objects
1112
    """
1113
    related_submissions = []
1✔
1114
    for related in submission.related_recids:
1✔
1115
        data_submission = get_latest_hepsubmission(
1✔
1116
            publication_recid=related.related_recid
1117
        )
1118
        if data_submission:
1✔
1119
            related_submissions.append(data_submission)
1✔
1120
    return related_submissions
1✔
1121

1122

1123
def get_related_to_this_hepsubmissions(submission):
1✔
1124
    """
1125
    Queries the database for all records in the RelatedRecId table
1126
    that have THIS record's id as a related record.
1127
    Then returns the HEPSubmission object marked in the RelatedRecid table.
1128
    Returns only submissions marked as 'finished'
1129

1130
    :return: [list] List containing related records.
1131
    """
1132

1133
    # We use a subquery to get the max version/recid pairing
1134
    subquery = (
1✔
1135
        HEPSubmission.query
1136
        .with_entities(
1137
            HEPSubmission.publication_recid,
1138
            func.max(HEPSubmission.version).label('max_version')
1139
        )
1140
        .group_by(HEPSubmission.publication_recid)
1141
        .subquery()
1142
    )
1143

1144
    # Use result of subquery to join and select the max submission where related
1145
    related_submissions = (
1✔
1146
        HEPSubmission.query
1147
        .join(subquery, (HEPSubmission.publication_recid == subquery.c.publication_recid) & (
1148
                HEPSubmission.version == subquery.c.max_version))
1149
        .join(RelatedRecid, RelatedRecid.this_recid == HEPSubmission.publication_recid)
1150
        .filter(RelatedRecid.related_recid == submission.publication_recid)
1151
        .all()
1152
    )
1153

1154
    # Set comprehension to determine unique IDs where the max version object is 'finished'
1155
    unique_recids = {sub.publication_recid for sub in related_submissions if sub.overall_status == 'finished'}
1✔
1156

1157
    return [get_latest_hepsubmission(publication_recid=recid, overall_status='finished') for recid in unique_recids]
1✔
1158

1159

1160
def get_related_datasubmissions(data_submission):
1✔
1161
    """
1162
    Queries the database for all DataSubmission objects contained in
1163
    this object's related DOI list.
1164
    (All submissions this one is relating to)
1165

1166
    :param data_submission: The datasubmission object to find related data for.
1167
    :return: [list] A list of DataSubmission objects
1168
    """
1169
    related_submissions = []
1✔
1170
    for related in data_submission.related_tables:
1✔
1171
        submission = (
1✔
1172
            DataSubmission.query
1173
            .filter(DataSubmission.doi == related.related_doi)
1174
            .join(HEPSubmission, HEPSubmission.publication_recid == DataSubmission.publication_recid)
1175
            .first()
1176
        )
1177
        if submission:
1✔
1178
            related_submissions.append(submission)
1✔
1179
    return related_submissions
1✔
1180

1181

1182
def get_related_to_this_datasubmissions(data_submission):
1✔
1183
    """
1184
        Get the DataSubmission Objects with a RelatedTable entry
1185
        where this doi is referred to in related_doi.
1186
        Only returns where associated HEPSubmission object is `finished`,
1187
        OR where it is within the same HEPSubmission
1188

1189
        :param data_submission: The datasubmission to find the related entries for.
1190
        :return: [List] List of DataSubmission objects.
1191
    """
1192
    related_submissions = (
1✔
1193
        DataSubmission.query
1194
        .join(RelatedTable, RelatedTable.table_doi == DataSubmission.doi)
1195
        .join(HEPSubmission, (HEPSubmission.publication_recid == DataSubmission.publication_recid))
1196
        .group_by(DataSubmission.id)
1197
        .having(func.max(HEPSubmission.version) == DataSubmission.version)
1198
        .filter(RelatedTable.related_doi == data_submission.doi)
1199
        # If finished, OR is part of the same submission
1200
        .filter(
1201
            (HEPSubmission.overall_status == 'finished') | (
1202
                HEPSubmission.publication_recid == data_submission.publication_recid))
1203
        .all()
1204
    )
1205
    return related_submissions
1✔
1206

1207

1208
def get_record_data_list(record, data_type):
1✔
1209
    """
1210
    Generates a dictionary (title/recid) from a list of record IDs.
1211
    This must be done as the record contents are not stored within the hepsubmission object.
1212

1213
    :param record: The record used for the query.
1214
    :param data_type: Either the related, or related to this data.
1215
    :return: [list] A list of dictionary objects containing record ID and title pairs
1216
    """
1217
    # Selects the related data based on the data_type flag
1218
    data = []
1✔
1219
    if data_type == "related":
1✔
1220
        data = get_related_hepsubmissions(record)
1✔
1221
    elif data_type == "related_to_this":
1✔
1222
        data = get_related_to_this_hepsubmissions(record)
1✔
1223

1224
    record_data = []
1✔
1225
    for datum in data:
1✔
1226
        record_data.append(
1✔
1227
        {
1228
            "recid": datum.publication_recid,
1229
            "title": get_record_contents(datum.publication_recid)["title"],
1230
            "version": datum.version
1231
        })
1232
    return record_data
1✔
1233

1234

1235
def get_table_data_list(table, data_type):
1✔
1236
    """
1237
    Generates a list of general information (name, doi, desc) dictionaries of related DataSubmission objects.
1238
    Will either use the related data list (get_related_data_submissions)
1239
    OR the `related to this` list (generated by get_related_to_this_datasubmissions)
1240

1241
    :param table: The DataSubmission object used for querying.
1242
    :param data_type: The flag to decide which relation data to use.
1243
    :return: [list] A list of dictionaries with the name, doi and description of the object.
1244
    """
1245
    # Selects the related data based on the data_type flag
1246
    if data_type == "related":
1✔
1247
        data = get_related_datasubmissions(table)
1✔
1248
    elif data_type == "related_to_this":
1✔
1249
        data = get_related_to_this_datasubmissions(table)
1✔
1250

1251
    record_data = []
1✔
1252
    if data:
1✔
1253
        for datum in data:
1✔
1254
            record_data.append({
1✔
1255
                "name": datum.name,
1256
                "doi": datum.doi,
1257
                "description": datum.description
1258
            })
1259
    return record_data
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc