• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HEPData / hepdata / 6877611252

15 Nov 2023 12:58PM UTC coverage: 83.128% (+0.3%) from 82.825%
6877611252

push

github

ItIsJordan
Related function refactoring

Moves functions for related data from the models.py file and common.py into /records/api.py. Updates test_records.py to use new functions

49 of 49 new or added lines in 3 files covered. (100.0%)

87 existing lines in 2 files now uncovered.

4449 of 5352 relevant lines covered (83.13%)

0.83 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.13
/hepdata/modules/records/utils/data_processing_utils.py
1
# -*- coding: utf-8 -*-
2
#
3
# This file is part of HEPData.
4
# Copyright (C) 2016 CERN.
5
#
6
# HEPData is free software; you can redistribute it
7
# and/or modify it under the terms of the GNU General Public License as
8
# published by the Free Software Foundation; either version 2 of the
9
# License, or (at your option) any later version.
10
#
11
# HEPData is distributed in the hope that it will be
12
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with HEPData; if not, write to the
18
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
19
# MA 02111-1307, USA.
20
#
21
# In applying this license, CERN does not
22
# waive the privileges and immunities granted to it by virtue of its status
23
# as an Intergovernmental Organization or submit itself to any jurisdiction.
24

25
from flask import current_app
1✔
26
from collections import OrderedDict
1✔
27
import re
1✔
28

29
from hepdata.utils.miscellaneous import sanitize_html
1✔
30

31

32
def pad_independent_variables(table_contents):
1✔
33
    """
34
    Pads out the independent variable column in the event that nothing exists.
35

36
    :param table_contents:
37
    :return:
38
    """
39
    _dep_count = len(table_contents["dependent_variables"][0]['values'])
1✔
40
    _count = 0
1✔
41
    _ind_vars = {"header": {"name": "", "units": ""}, "values": []}
1✔
42
    while _count < _dep_count:
1✔
43
        _ind_vars["values"].append({"value": ""})
1✔
44
        _count += 1
1✔
45

46
    table_contents["independent_variables"].append(_ind_vars)
1✔
47

48

49
def fix_nan_inf(value):
1✔
50
    """
51
    Converts NaN, +inf, and -inf values to strings.
52

53
    :param value:
54
    :return:
55
    """
56
    keys = ['value', 'high', 'low']
1✔
57
    for key in keys:
1✔
58
        if key in value and str(value[key]) in current_app.config['SPECIAL_VALUES']:
1✔
59
            value[key] = str(value['value'])
×
60
    return value
1✔
61

62

63
def process_independent_variables(table_contents, x_axes, independent_variable_headers):
1✔
64

65
    if len(table_contents["independent_variables"]) == 0 and table_contents["dependent_variables"]:
1✔
66
        pad_independent_variables(table_contents)
1✔
67

68
    if table_contents["independent_variables"]:
1✔
69
        count = 0
1✔
70
        for x_axis in table_contents["independent_variables"]:
1✔
71
            units = x_axis['header']['units'] if 'units' in x_axis['header'] else ''
1✔
72
            x_header = x_axis['header']['name']
1✔
73
            if units:
1✔
74
                x_header += ' [' + units + ']'
1✔
75

76
            if x_header in x_axes:
1✔
77
                # sometimes, the x headers can be the same.
78
                # We must account for this.
79
                x_header += '__{0}'.format(count)
×
80

81
            x_axes[x_header] = []
1✔
82

83
            independent_variable_headers.append(
1✔
84
                {"name": x_header, "colspan": 1})
85

86
            if x_axis["values"]:
1✔
87
                # if x_header not in x_headers:
88

89
                for value in x_axis["values"]:
1✔
90
                    x_axes[x_header].append(fix_nan_inf(value))
1✔
91

92
            count += 1
1✔
93

94

95
def process_dependent_variables(group_count, record, table_contents,
1✔
96
                                tmp_values, independent_variables,
97
                                dependent_variable_headers):
98
    for y_axis in table_contents["dependent_variables"]:
1✔
99

100
        qualifiers = {}
1✔
101
        if "qualifiers" in y_axis:
1✔
102
            for qualifier in y_axis["qualifiers"]:
1✔
103
                # colspan = len(y_axis["qualifiers"][qualifier])
104
                qualifier_name = qualifier["name"]
1✔
105

106
                if qualifier_name not in qualifiers:
1✔
107
                    qualifiers[qualifier_name] = 0
1✔
108
                else:
109
                    qualifiers[qualifier_name] += 1
×
110
                    count = qualifiers[qualifier_name]
×
111
                    qualifier_name = "{0}-{1}".format(qualifier_name, count)
×
112

113
                if qualifier_name not in record["qualifiers"].keys():
1✔
114
                    record["qualifier_order"].append(qualifier_name)
1✔
115
                    record["qualifiers"][qualifier_name] = []
1✔
116

117
                record["qualifiers"][qualifier_name].append(
1✔
118
                    {"type": qualifier["name"],
119
                     "value": str(qualifier["value"]) + (
120
                         ' ' + qualifier['units'] if 'units' in qualifier else ''),
121
                     "colspan": 1, "group": group_count})
122

123
            # attempt column merge
124
            for qualifier in record["qualifiers"]:
1✔
125
                values = record["qualifiers"][qualifier]
1✔
126
                merged_values = []
1✔
127
                last_value = None
1✔
128
                for counter, value in enumerate(values):
1✔
129
                    if not last_value:
1✔
130
                        last_value = value
1✔
131
                    else:
132
                        if last_value["type"] == value["type"] and last_value["value"] == value["value"]:
1✔
133
                            last_value["colspan"] += 1
1✔
134
                        else:
135
                            merged_values.append(last_value)
1✔
136
                            last_value = value
1✔
137

138
                    if counter == len(values) - 1:
1✔
139
                        merged_values.append(last_value)
1✔
140

141
                record["qualifiers"][qualifier] = merged_values
1✔
142

143
        units = y_axis['header']['units'] if 'units' in y_axis['header'] else ''
1✔
144
        y_header = y_axis['header']['name']
1✔
145
        if units:
1✔
146
            y_header += ' [' + units + ']'
1✔
147
        dependent_variable_headers.append({"name": y_header, "colspan": 1})
1✔
148

149
        count = 0
1✔
150
        too_many_y_values = False
1✔
151
        for value in y_axis["values"]:
1✔
152

153
            if count not in tmp_values.keys():
1✔
154

155
                # Check that number of y values does not exceed number of x values.
156
                for x_header in independent_variables:
1✔
157
                    if count > len(independent_variables[x_header]) - 1:
1✔
158
                        too_many_y_values = True
×
159
                if too_many_y_values: break
1✔
160

161
                x = []
1✔
162
                for x_header in independent_variables:
1✔
163
                    x.append(independent_variables[x_header][count])
1✔
164
                tmp_values[count] = {"x": x, "y": []}
1✔
165

166
            y_record = value
1✔
167

168
            fix_nan_inf(y_record)
1✔
169

170
            y_record["group"] = group_count
1✔
171

172
            if "errors" not in y_record:
1✔
173
                y_record["errors"] = [{"symerror": 0, "hide": True}]
1✔
174
            else:
175
                # process the labels to ensure uniqueness
176
                observed_error_labels = {}
1✔
177
                for error in y_record["errors"]:
1✔
178
                    error_label = error.get("label", "error")
1✔
179

180
                    if error_label not in observed_error_labels:
1✔
181
                        observed_error_labels[error_label] = 0
1✔
182
                    observed_error_labels[error_label] += 1
1✔
183

184
                    if observed_error_labels[error_label] > 1:
1✔
185
                        error["label"] = error_label + "_" + str(
×
186
                            observed_error_labels[error_label])
187

188
                    # append "_1" to first error label that has a duplicate
189
                    if observed_error_labels[error_label] == 2:
1✔
190
                        for error1 in y_record["errors"]:
×
191
                            error1_label = error1.get("label", "error")
×
192
                            if error1_label == error_label:
×
193
                                error1["label"] = error1_label + "_1"
×
194
                                break
×
195

196
            tmp_values[count]["y"].append(y_record)
1✔
197
            count += 1
1✔
198

199
        group_count += 1
1✔
200

201

202
def generate_table_structure(table_contents):
1✔
203
    """
204
    Creates a renderable structure from the table structure we've defined.
205

206
    :param table_contents:
207
    :return: a dictionary encompassing the qualifiers, headers and values
208
    """
209

210
    record = {"name": table_contents["name"], "doi": table_contents["doi"],
1✔
211
              "location": table_contents["location"],
212
              "related_tables" : table_contents["related_tables"],
213
              "related_to_this" : table_contents["related_to_this"],
214
              "qualifiers": {},
215
              "qualifier_order": [], "headers": [],
216
              "review": table_contents["review"],
217
              "associated_files": table_contents["associated_files"],
218
              "keywords": {},
219
              "values": []}
220

221
    record["description"] = sanitize_html(table_contents["title"])
1✔
222

223
    # add in keywords
224
    if table_contents['keywords'] is not None:
1✔
225
        for keyword in table_contents['keywords']:
1✔
226
            if keyword.name not in record['keywords']:
1✔
227
                record['keywords'][keyword.name] = []
1✔
228

229
            if keyword.value not in record['keywords'][keyword.name]:
1✔
230
                record['keywords'][keyword.name].append(keyword.value)
1✔
231

232
    tmp_values = {}
1✔
233
    x_axes = OrderedDict()
1✔
234
    x_headers = []
1✔
235
    process_independent_variables(table_contents, x_axes, x_headers)
1✔
236
    record["x_count"] = len(x_headers)
1✔
237
    record["headers"] += x_headers
1✔
238

239
    group_count = 0
1✔
240
    yheaders = []
1✔
241

242
    process_dependent_variables(group_count, record, table_contents,
1✔
243
                                tmp_values, x_axes, yheaders)
244

245
    # attempt column merge
246
    last_yheader = None
1✔
247
    for counter, yheader in enumerate(yheaders):
1✔
248
        if not last_yheader:
1✔
249
            last_yheader = yheader
1✔
250
        else:
251
            if last_yheader["name"] == yheader["name"]:
1✔
252
                last_yheader["colspan"] += 1
1✔
253
            else:
254
                record["headers"].append(last_yheader)
×
255
                last_yheader = yheader
×
256
        if counter == len(yheaders) - 1:
1✔
257
            record["headers"].append(last_yheader)
1✔
258

259
    for tmp_value in tmp_values:
1✔
260
        record["values"].append(tmp_values[tmp_value])
1✔
261

262
    return record
1✔
263

264

265
def str_presenter(dumper, data):
1✔
266
    if "\n" in data:
1✔
267
        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
×
268
    return dumper.represent_scalar('tag:yaml.org,2002:str', data)
1✔
269

270

271
def process_ctx(ctx, light_mode=False):
1✔
272
    for key_to_remove in ['show_review_widget', 'show_upload_area', 'show_upload_widget',
×
273
                          'coordinators', 'is_submission_coordinator_or_admin', 'is_admin']:
274
        ctx.pop(key_to_remove, None)
×
275

276
    if light_mode:
×
277
        ctx.pop('data_tables', None)
×
278
    else:
279
        site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
×
280
        for data_table in ctx['data_tables']:
×
281
            for key_to_remove in ['review_status', 'review_flag']:
×
282
                data_table.pop(key_to_remove, None)
×
283

284
                if ctx['status'] == 'finished' and ctx['record']['inspire_id']:
×
285
                    _recid = 'ins{}'.format(ctx['record']['inspire_id'])
×
286
                elif 'recid' in ctx['record']:
×
287
                    _recid = ctx['record']['recid']
×
288
                else:
289
                    _recid = ctx['recid']
×
290

291
                _cleaned_table_name = data_table['name'].replace('%', '%25').replace('\\', '%5C')
×
292

293
                data_table['data'] = {
×
294
                    'json': '{0}/download/table/{1}/{2}/json'.format(
295
                        site_url, _recid, _cleaned_table_name),
296
                    'root': '{0}/download/table/{1}/{2}/root'.format(
297
                        site_url, _recid, _cleaned_table_name),
298
                    'csv': '{0}/download/table/{1}/{2}/csv'.format(
299
                        site_url, _recid, _cleaned_table_name),
300
                    'yoda': '{0}/download/table/{1}/{2}/yoda'.format(
301
                        site_url, _recid, _cleaned_table_name),
302
                    'yoda1': '{0}/download/table/{1}/{2}/yoda1'.format(
303
                        site_url, _recid, _cleaned_table_name),
304
                    'yaml': '{0}/download/table/{1}/{2}/yaml'.format(
305
                        site_url, _recid, _cleaned_table_name)}
306

UNCOV
307
    return ctx
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc