• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

inspirehep / inspire-next / 12567

pending completion
12567

Pull #3460

travis-ci

web-flow
disambiguation: add signature pairs sampling

Since INSPIRE has ~3M curated signatures it would take too much time
to train on all possible pairs, so we sample 1M pairs in such a way
that they are representative of the known clusters structure.

Signed-off-by: Jacopo Notarstefano <jacopo.notarstefano@gmail.com>
Pull Request #3460: disambiguation: add signature pairs sampling

60 of 60 new or added lines in 3 files covered. (100.0%)

7576 of 9664 relevant lines covered (78.39%)

2.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.74
/inspirehep/modules/literaturesuggest/tasks.py
1
# -*- coding: utf-8 -*-
2
#
3
# This file is part of INSPIRE.
4
# Copyright (C) 2014-2017 CERN.
5
#
6
# INSPIRE is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# INSPIRE is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
18
#
19
# In applying this license, CERN does not waive the privileges and immunities
20
# granted to it by virtue of its status as an Intergovernmental Organization
21
# or submit itself to any jurisdiction.
22

23
from __future__ import absolute_import, division, print_function
5✔
24

25
import copy
5✔
26
import datetime
5✔
27

28
from idutils import is_arxiv_post_2007
5✔
29

30
from inspire_schemas.api import LiteratureBuilder
5✔
31
from inspire_utils.helpers import force_list
5✔
32
from inspire_utils.record import get_value
5✔
33
from inspirehep.modules.forms.utils import filter_empty_elements
5✔
34
from inspirehep.modules.workflows.utils import with_debug_logging
5✔
35
from inspirehep.utils.record import get_title
5✔
36

37

38
def formdata_to_model(obj, formdata):
5✔
39
    """Manipulate form data to match literature data model."""
40
    def _is_arxiv_url(url):
1✔
41
        return 'arxiv.org' in url
1✔
42

43
    form_fields = copy.deepcopy(formdata)
1✔
44
    filter_empty_elements(
1✔
45
        form_fields, ['authors', 'supervisors', 'report_numbers']
46
    )
47

48
    builder = LiteratureBuilder(source='submitter')
1✔
49

50
    for author in form_fields.get('authors', []):
1✔
51
        builder.add_author(builder.make_author(
×
52
            author['full_name'],
53
            affiliations=force_list(author['affiliation'])
54
            if author['affiliation'] else None,
55
            roles=['author']
56
        ))
57

58
    for supervisor in form_fields.get('supervisors', []):
1✔
59
        builder.add_author(builder.make_author(
×
60
            supervisor['full_name'],
61
            affiliations=force_list(supervisor['affiliation'])
62
            if author['affiliation'] else None,
63
            roles=['supervisor']
64
        ))
65

66
    builder.add_title(title=form_fields.get('title'))
1✔
67

68
    document_type = 'conference paper' if form_fields.get('conf_name') \
1✔
69
        else form_fields.get('type_of_doc', [])
70
    if document_type == 'chapter':
1✔
71
        document_type = 'book chapter'
1✔
72

73
    builder.add_document_type(
1✔
74
        document_type=document_type
75
    )
76

77
    builder.add_abstract(
1✔
78
        abstract=form_fields.get('abstract'),
79
        source='arXiv' if form_fields.get('categories') else None
80
    )
81

82
    if form_fields.get('arxiv_id') and form_fields.get('categories'):
1✔
83
        builder.add_arxiv_eprint(
×
84
            arxiv_id=form_fields.get('arxiv_id'),
85
            arxiv_categories=form_fields.get('categories').split()
86
        )
87

88
    builder.add_doi(doi=form_fields.get('doi'))
1✔
89

90
    builder.add_inspire_categories(
1✔
91
        subject_terms=form_fields.get('subject_term'),
92
        source='user'
93
    )
94

95
    for key in ('extra_comments', 'nonpublic_note',
1✔
96
                'hidden_notes', 'conf_name'):
97
        builder.add_private_note(
1✔
98
            private_notes=form_fields.get(key)
99
        )
100

101
    year = form_fields.get('year')
1✔
102
    try:
1✔
103
        year = int(year)
1✔
104
    except (TypeError, ValueError):
1✔
105
        year = None
1✔
106

107
    builder.add_preprint_date(
1✔
108
        preprint_date=form_fields.get('preprint_created')
109
    )
110

111
    if form_fields.get('type_of_doc') == 'thesis':
1✔
112
        builder.add_thesis(
1✔
113
            defense_date=form_fields.get('defense_date'),
114
            degree_type=form_fields.get('degree_type'),
115
            institution=form_fields.get('institution'),
116
            date=form_fields.get('thesis_date')
117
        )
118

119
    if form_fields.get('type_of_doc') == 'chapter':
1✔
120
        if not form_fields.get('journal_title'):
1✔
121
            builder.add_book_series(title=form_fields.get('series_title'))
1✔
122

123
    if form_fields.get('type_of_doc') == 'book':
1✔
124
            if form_fields.get('journal_title'):
1✔
125
                form_fields['volume'] = form_fields.get('series_volume')
×
126
            else:
127
                builder.add_book_series(title=form_fields.get('series_title'),
1✔
128
                                        volume=form_fields.get('series_volume')
129
                                        )
130
            builder.add_book(
1✔
131
                publisher=form_fields.get('publisher_name'),
132
                place=form_fields.get('publication_place'),
133
                date=form_fields.get('publication_date'))
134

135
    builder.add_publication_info(
1✔
136
        year=year,
137
        cnum=form_fields.get('conference_id'),
138
        journal_issue=form_fields.get('issue'),
139
        journal_title=form_fields.get('journal_title'),
140
        journal_volume=form_fields.get('volume'),
141
        page_start=form_fields.get('start_page'),
142
        page_end=form_fields.get('end_page'),
143
        artid=form_fields.get('artid'),
144
        parent_record=form_fields.get('parent_book')
145
    )
146

147
    builder.add_accelerator_experiments_legacy_name(
1✔
148
        legacy_name=form_fields.get('experiment')
149
    )
150

151
    language = form_fields.get('other_language') \
1✔
152
        if form_fields.get('language') == 'oth' \
153
        else form_fields.get('language')
154
    builder.add_language(language=language)
1✔
155

156
    if form_fields.get('title_translation'):
1✔
157
        builder.add_title_translation(
×
158
            title=form_fields['title_translation'],
159
            language='en',
160
        )
161

162
    builder.add_title(
1✔
163
        title=form_fields.get('title_arXiv'),
164
        source='arXiv'
165
    )
166

167
    builder.add_title(
1✔
168
        title=form_fields.get('title_crossref'),
169
        source='crossref'
170
    )
171

172
    builder.add_license(url=form_fields.get('license_url'))
1✔
173

174
    builder.add_public_note(public_note=form_fields.get('public_notes'))
1✔
175

176
    builder.add_public_note(
1✔
177
        public_note=form_fields.get('note'),
178
        source='arXiv' if form_fields.get('categories') else 'CrossRef'
179
    )
180

181
    form_url = form_fields.get('url')
1✔
182
    form_additional_url = form_fields.get('additional_url')
1✔
183
    if form_url and not _is_arxiv_url(form_url):
1✔
184
        obj.extra_data['submission_pdf'] = form_url
1✔
185
        if not form_additional_url:
1✔
186
            builder.add_url(url=form_url)
1✔
187

188
    if form_additional_url and not _is_arxiv_url(form_additional_url):
1✔
189
        builder.add_url(url=form_additional_url)
1✔
190

191
    [builder.add_report_number(
1✔
192
        report_number=report_number.get('report_number')
193
    ) for report_number in form_fields.get('report_numbers', [])]
194

195
    builder.add_collaboration(collaboration=form_fields.get('collaboration'))
1✔
196

197
    builder.add_acquisition_source(
1✔
198
        datetime=datetime.datetime.utcnow().isoformat(),
199
        submission_number=obj.id,
200
        internal_uid=int(obj.id_user),
201
        email=form_fields.get('email'),
202
        orcid=form_fields.get('orcid'),
203
        method='submitter'
204
    )
205

206
    return builder.record
1✔
207

208

209
def new_ticket_context(user, obj):
5✔
210
    """Context for literature new tickets."""
211
    title = get_title(obj.data)
1✔
212
    subject = u"Your suggestion to INSPIRE: {0}".format(title)
1✔
213
    user_comment = obj.extra_data.get('formdata', {}).get('extra_comments', '')
1✔
214
    identifiers = get_value(obj.data, "external_system_numbers.value") or []
1✔
215
    return dict(
1✔
216
        email=user.email,
217
        title=title,
218
        identifier=identifiers or "",
219
        user_comment=user_comment,
220
        references=obj.extra_data.get('formdata', {}).get('references'),
221
        object=obj,
222
        subject=subject
223
    )
224

225

226
def reply_ticket_context(user, obj):
5✔
227
    """Context for literature replies."""
228
    return dict(
1✔
229
        object=obj,
230
        user=user,
231
        title=get_title(obj.data),
232
        reason=obj.extra_data.get("reason", ""),
233
        record_url=obj.extra_data.get("url", ""),
234
    )
235

236

237
def curation_ticket_context(user, obj):
5✔
238
    recid = obj.extra_data.get('recid')
2✔
239
    record_url = obj.extra_data.get('url')
2✔
240

241
    arxiv_ids = get_value(obj.data, 'arxiv_eprints.value') or []
2✔
242
    for index, arxiv_id in enumerate(arxiv_ids):
2✔
243
        if arxiv_id and is_arxiv_post_2007(arxiv_id):
2✔
244
            arxiv_ids[index] = 'arXiv:{0}'.format(arxiv_id)
2✔
245

246
    report_numbers = get_value(obj.data, 'report_numbers.value') or []
2✔
247
    dois = [
2✔
248
        "doi:{0}".format(doi)
249
        for doi in get_value(obj.data, 'dois.value') or []
250
    ]
251
    link_to_pdf = obj.extra_data.get('formdata', {}).get('url')
2✔
252

253
    subject = ' '.join(filter(
2✔
254
        lambda x: x is not None,
255
        arxiv_ids + dois + report_numbers + ['(#{0})'.format(recid)]
256
    ))
257

258
    references = obj.extra_data.get('formdata', {}).get('references')
2✔
259
    user_comment = obj.extra_data.get('formdata', {}).get('extra_comments', '')
2✔
260

261
    return dict(
2✔
262
        recid=recid,
263
        record_url=record_url,
264
        link_to_pdf=link_to_pdf,
265
        email=user.email if user else '',
266
        references=references,
267
        user_comment=user_comment,
268
        subject=subject
269
    )
270

271

272
@with_debug_logging
5✔
273
def curation_ticket_needed(obj, eng):
274
    """Check if the a curation ticket is needed."""
275
    return obj.extra_data.get("core", False)
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc