6877611252

Committed 15 Nov 2023 12:58PM UTC coverage: 83.128% (+0.3%) from 82.825%

Build # 6877611252

Build Type

push

github

Committed by

ItIsJordan

Commit Message

Related function refactoring

Moves functions for related data from the models.py file and common.py into /records/api.py. Updates test_records.py to use new functions

Run Details

49 of 49 new or added lines in 3 files covered. (100.0%)

87 existing lines in 2 files now uncovered.

4449 of 5352 relevant lines covered (83.13%)

0.83 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.13

/hepdata/modules/records/utils/data_processing_utils.py

# -*- coding: utf-8 -*-
#
# This file is part of HEPData.
# Copyright (C) 2016 CERN.
#
# HEPData is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# HEPData is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HEPData; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

from flask import current_app
from collections import OrderedDict
import re

from hepdata.utils.miscellaneous import sanitize_html


def pad_independent_variables(table_contents):
    """
    Pads out the independent variable column in the event that nothing exists.

    :param table_contents:
    :return:
    """
    _dep_count = len(table_contents["dependent_variables"][0]['values'])
    _count = 0
    _ind_vars = {"header": {"name": "", "units": ""}, "values": []}
    while _count < _dep_count:
        _ind_vars["values"].append({"value": ""})
        _count += 1

    table_contents["independent_variables"].append(_ind_vars)


def fix_nan_inf(value):
    """
    Converts NaN, +inf, and -inf values to strings.

    :param value:
    :return:
    """
    keys = ['value', 'high', 'low']
    for key in keys:
        if key in value and str(value[key]) in current_app.config['SPECIAL_VALUES']:
            value[key] = str(value['value'])
    return value


def process_independent_variables(table_contents, x_axes, independent_variable_headers):

    if len(table_contents["independent_variables"]) == 0 and table_contents["dependent_variables"]:
        pad_independent_variables(table_contents)

    if table_contents["independent_variables"]:
        count = 0
        for x_axis in table_contents["independent_variables"]:
            units = x_axis['header']['units'] if 'units' in x_axis['header'] else ''
            x_header = x_axis['header']['name']
            if units:
                x_header += ' [' + units + ']'

            if x_header in x_axes:
                # sometimes, the x headers can be the same.
                # We must account for this.
                x_header += '__{0}'.format(count)

            x_axes[x_header] = []

            independent_variable_headers.append(
                {"name": x_header, "colspan": 1})

            if x_axis["values"]:
                # if x_header not in x_headers:

                for value in x_axis["values"]:
                    x_axes[x_header].append(fix_nan_inf(value))

            count += 1


def process_dependent_variables(group_count, record, table_contents,
                                tmp_values, independent_variables,
                                dependent_variable_headers):
    for y_axis in table_contents["dependent_variables"]:

        qualifiers = {}
        if "qualifiers" in y_axis:
            for qualifier in y_axis["qualifiers"]:
                # colspan = len(y_axis["qualifiers"][qualifier])
                qualifier_name = qualifier["name"]

                if qualifier_name not in qualifiers:
                    qualifiers[qualifier_name] = 0
                else:
                    qualifiers[qualifier_name] += 1
                    count = qualifiers[qualifier_name]
                    qualifier_name = "{0}-{1}".format(qualifier_name, count)

                if qualifier_name not in record["qualifiers"].keys():
                    record["qualifier_order"].append(qualifier_name)
                    record["qualifiers"][qualifier_name] = []

                record["qualifiers"][qualifier_name].append(
                    {"type": qualifier["name"],
                     "value": str(qualifier["value"]) + (
                         ' ' + qualifier['units'] if 'units' in qualifier else ''),
                     "colspan": 1, "group": group_count})

            # attempt column merge
            for qualifier in record["qualifiers"]:
                values = record["qualifiers"][qualifier]
                merged_values = []
                last_value = None
                for counter, value in enumerate(values):
                    if not last_value:
                        last_value = value
                    else:
                        if last_value["type"] == value["type"] and last_value["value"] == value["value"]:
                            last_value["colspan"] += 1
                        else:
                            merged_values.append(last_value)
                            last_value = value

                    if counter == len(values) - 1:
                        merged_values.append(last_value)

                record["qualifiers"][qualifier] = merged_values

        units = y_axis['header']['units'] if 'units' in y_axis['header'] else ''
        y_header = y_axis['header']['name']
        if units:
            y_header += ' [' + units + ']'
        dependent_variable_headers.append({"name": y_header, "colspan": 1})

        count = 0
        too_many_y_values = False
        for value in y_axis["values"]:

            if count not in tmp_values.keys():

                # Check that number of y values does not exceed number of x values.
                for x_header in independent_variables:
                    if count > len(independent_variables[x_header]) - 1:
                        too_many_y_values = True
                if too_many_y_values: break

                x = []
                for x_header in independent_variables:
                    x.append(independent_variables[x_header][count])
                tmp_values[count] = {"x": x, "y": []}

            y_record = value

            fix_nan_inf(y_record)

            y_record["group"] = group_count

            if "errors" not in y_record:
                y_record["errors"] = [{"symerror": 0, "hide": True}]
            else:
                # process the labels to ensure uniqueness
                observed_error_labels = {}
                for error in y_record["errors"]:
                    error_label = error.get("label", "error")

                    if error_label not in observed_error_labels:
                        observed_error_labels[error_label] = 0
                    observed_error_labels[error_label] += 1

                    if observed_error_labels[error_label] > 1:
                        error["label"] = error_label + "_" + str(
                            observed_error_labels[error_label])

                    # append "_1" to first error label that has a duplicate
                    if observed_error_labels[error_label] == 2:
                        for error1 in y_record["errors"]:
                            error1_label = error1.get("label", "error")
                            if error1_label == error_label:
                                error1["label"] = error1_label + "_1"
                                break

            tmp_values[count]["y"].append(y_record)
            count += 1

        group_count += 1


def generate_table_structure(table_contents):
    """
    Creates a renderable structure from the table structure we've defined.

    :param table_contents:
    :return: a dictionary encompassing the qualifiers, headers and values
    """

    record = {"name": table_contents["name"], "doi": table_contents["doi"],
              "location": table_contents["location"],
              "related_tables" : table_contents["related_tables"],
              "related_to_this" : table_contents["related_to_this"],
              "qualifiers": {},
              "qualifier_order": [], "headers": [],
              "review": table_contents["review"],
              "associated_files": table_contents["associated_files"],
              "keywords": {},
              "values": []}

    record["description"] = sanitize_html(table_contents["title"])

    # add in keywords
    if table_contents['keywords'] is not None:
        for keyword in table_contents['keywords']:
            if keyword.name not in record['keywords']:
                record['keywords'][keyword.name] = []

            if keyword.value not in record['keywords'][keyword.name]:
                record['keywords'][keyword.name].append(keyword.value)

    tmp_values = {}
    x_axes = OrderedDict()
    x_headers = []
    process_independent_variables(table_contents, x_axes, x_headers)
    record["x_count"] = len(x_headers)
    record["headers"] += x_headers

    group_count = 0
    yheaders = []

    process_dependent_variables(group_count, record, table_contents,
                                tmp_values, x_axes, yheaders)

    # attempt column merge
    last_yheader = None
    for counter, yheader in enumerate(yheaders):
        if not last_yheader:
            last_yheader = yheader
        else:
            if last_yheader["name"] == yheader["name"]:
                last_yheader["colspan"] += 1
            else:
                record["headers"].append(last_yheader)
                last_yheader = yheader
        if counter == len(yheaders) - 1:
            record["headers"].append(last_yheader)

    for tmp_value in tmp_values:
        record["values"].append(tmp_values[tmp_value])

    return record


def str_presenter(dumper, data):
    if "\n" in data:
        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
    return dumper.represent_scalar('tag:yaml.org,2002:str', data)


def process_ctx(ctx, light_mode=False):
    for key_to_remove in ['show_review_widget', 'show_upload_area', 'show_upload_widget',
                          'coordinators', 'is_submission_coordinator_or_admin', 'is_admin']:
        ctx.pop(key_to_remove, None)

    if light_mode:
        ctx.pop('data_tables', None)
    else:
        site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
        for data_table in ctx['data_tables']:
            for key_to_remove in ['review_status', 'review_flag']:
                data_table.pop(key_to_remove, None)

                if ctx['status'] == 'finished' and ctx['record']['inspire_id']:
                    _recid = 'ins{}'.format(ctx['record']['inspire_id'])
                elif 'recid' in ctx['record']:
                    _recid = ctx['record']['recid']
                else:
                    _recid = ctx['recid']

                _cleaned_table_name = data_table['name'].replace('%', '%25').replace('\\', '%5C')

                data_table['data'] = {
                    'json': '{0}/download/table/{1}/{2}/json'.format(
                        site_url, _recid, _cleaned_table_name),
                    'root': '{0}/download/table/{1}/{2}/root'.format(
                        site_url, _recid, _cleaned_table_name),
                    'csv': '{0}/download/table/{1}/{2}/csv'.format(
                        site_url, _recid, _cleaned_table_name),
                    'yoda': '{0}/download/table/{1}/{2}/yoda'.format(
                        site_url, _recid, _cleaned_table_name),
                    'yoda1': '{0}/download/table/{1}/{2}/yoda1'.format(
                        site_url, _recid, _cleaned_table_name),
                    'yaml': '{0}/download/table/{1}/{2}/yaml'.format(
                        site_url, _recid, _cleaned_table_name)}

    return ctx

1	# -- coding: utf-8 --
2	#
3	# This file is part of HEPData.
4	# Copyright (C) 2016 CERN.
5	#
6	# HEPData is free software; you can redistribute it
7	# and/or modify it under the terms of the GNU General Public License as
8	# published by the Free Software Foundation; either version 2 of the
9	# License, or (at your option) any later version.
10	#
11	# HEPData is distributed in the hope that it will be
12	# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	# General Public License for more details.
15	#
16	# You should have received a copy of the GNU General Public License
17	# along with HEPData; if not, write to the
18	# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
19	# MA 02111-1307, USA.
20	#
21	# In applying this license, CERN does not
22	# waive the privileges and immunities granted to it by virtue of its status
23	# as an Intergovernmental Organization or submit itself to any jurisdiction.
24
25	from flask import current_app	1✔
26	from collections import OrderedDict	1✔
27	import re	1✔
28
29	from hepdata.utils.miscellaneous import sanitize_html	1✔
30
31
32	def pad_independent_variables(table_contents):	1✔
33	"""
34	Pads out the independent variable column in the event that nothing exists.
35
36	:param table_contents:
37	:return:
38	"""
39	_dep_count = len(table_contents["dependent_variables"][0]['values'])	1✔
40	_count = 0	1✔
41	_ind_vars = {"header": {"name": "", "units": ""}, "values": []}	1✔
42	while _count < _dep_count:	1✔
43	_ind_vars["values"].append({"value": ""})	1✔
44	_count += 1	1✔
45
46	table_contents["independent_variables"].append(_ind_vars)	1✔
47
48
49	def fix_nan_inf(value):	1✔
50	"""
51	Converts NaN, +inf, and -inf values to strings.
52
53	:param value:
54	:return:
55	"""
56	keys = ['value', 'high', 'low']	1✔
57	for key in keys:	1✔
58	if key in value and str(value[key]) in current_app.config['SPECIAL_VALUES']:	1✔
59	value[key] = str(value['value'])	×
60	return value	1✔
61
62
63	def process_independent_variables(table_contents, x_axes, independent_variable_headers):	1✔
64
65	if len(table_contents["independent_variables"]) == 0 and table_contents["dependent_variables"]:	1✔
66	pad_independent_variables(table_contents)	1✔
67
68	if table_contents["independent_variables"]:	1✔
69	count = 0	1✔
70	for x_axis in table_contents["independent_variables"]:	1✔
71	units = x_axis['header']['units'] if 'units' in x_axis['header'] else ''	1✔
72	x_header = x_axis['header']['name']	1✔
73	if units:	1✔
74	x_header += ' [' + units + ']'	1✔
75
76	if x_header in x_axes:	1✔
77	# sometimes, the x headers can be the same.
78	# We must account for this.
79	x_header += '__{0}'.format(count)	×
80
81	x_axes[x_header] = []	1✔
82
83	independent_variable_headers.append(	1✔
84	{"name": x_header, "colspan": 1})
85
86	if x_axis["values"]:	1✔
87	# if x_header not in x_headers:
88
89	for value in x_axis["values"]:	1✔
90	x_axes[x_header].append(fix_nan_inf(value))	1✔
91
92	count += 1	1✔
93
94
95	def process_dependent_variables(group_count, record, table_contents,	1✔
96	tmp_values, independent_variables,
97	dependent_variable_headers):
98	for y_axis in table_contents["dependent_variables"]:	1✔
99
100	qualifiers = {}	1✔
101	if "qualifiers" in y_axis:	1✔
102	for qualifier in y_axis["qualifiers"]:	1✔
103	# colspan = len(y_axis["qualifiers"][qualifier])
104	qualifier_name = qualifier["name"]	1✔
105
106	if qualifier_name not in qualifiers:	1✔
107	qualifiers[qualifier_name] = 0	1✔
108	else:
109	qualifiers[qualifier_name] += 1	×
110	count = qualifiers[qualifier_name]	×
111	qualifier_name = "{0}-{1}".format(qualifier_name, count)	×
112
113	if qualifier_name not in record["qualifiers"].keys():	1✔
114	record["qualifier_order"].append(qualifier_name)	1✔
115	record["qualifiers"][qualifier_name] = []	1✔
116
117	record["qualifiers"][qualifier_name].append(	1✔
118	{"type": qualifier["name"],
119	"value": str(qualifier["value"]) + (
120	' ' + qualifier['units'] if 'units' in qualifier else ''),
121	"colspan": 1, "group": group_count})
122
123	# attempt column merge
124	for qualifier in record["qualifiers"]:	1✔
125	values = record["qualifiers"][qualifier]	1✔
126	merged_values = []	1✔
127	last_value = None	1✔
128	for counter, value in enumerate(values):	1✔
129	if not last_value:	1✔
130	last_value = value	1✔
131	else:
132	if last_value["type"] == value["type"] and last_value["value"] == value["value"]:	1✔
133	last_value["colspan"] += 1	1✔
134	else:
135	merged_values.append(last_value)	1✔
136	last_value = value	1✔
137
138	if counter == len(values) - 1:	1✔
139	merged_values.append(last_value)	1✔
140
141	record["qualifiers"][qualifier] = merged_values	1✔
142
143	units = y_axis['header']['units'] if 'units' in y_axis['header'] else ''	1✔
144	y_header = y_axis['header']['name']	1✔
145	if units:	1✔
146	y_header += ' [' + units + ']'	1✔
147	dependent_variable_headers.append({"name": y_header, "colspan": 1})	1✔
148
149	count = 0	1✔
150	too_many_y_values = False	1✔
151	for value in y_axis["values"]:	1✔
152
153	if count not in tmp_values.keys():	1✔
154
155	# Check that number of y values does not exceed number of x values.
156	for x_header in independent_variables:	1✔
157	if count > len(independent_variables[x_header]) - 1:	1✔
158	too_many_y_values = True	×
159	if too_many_y_values: break	1✔
160
161	x = []	1✔
162	for x_header in independent_variables:	1✔
163	x.append(independent_variables[x_header][count])	1✔
164	tmp_values[count] = {"x": x, "y": []}	1✔
165
166	y_record = value	1✔
167
168	fix_nan_inf(y_record)	1✔
169
170	y_record["group"] = group_count	1✔
171
172	if "errors" not in y_record:	1✔
173	y_record["errors"] = [{"symerror": 0, "hide": True}]	1✔
174	else:
175	# process the labels to ensure uniqueness
176	observed_error_labels = {}	1✔
177	for error in y_record["errors"]:	1✔
178	error_label = error.get("label", "error")	1✔
179
180	if error_label not in observed_error_labels:	1✔
181	observed_error_labels[error_label] = 0	1✔
182	observed_error_labels[error_label] += 1	1✔
183
184	if observed_error_labels[error_label] > 1:	1✔
185	error["label"] = error_label + "_" + str(	×
186	observed_error_labels[error_label])
187
188	# append "_1" to first error label that has a duplicate
189	if observed_error_labels[error_label] == 2:	1✔
190	for error1 in y_record["errors"]:	×
191	error1_label = error1.get("label", "error")	×
192	if error1_label == error_label:	×
193	error1["label"] = error1_label + "_1"	×
194	break	×
195
196	tmp_values[count]["y"].append(y_record)	1✔
197	count += 1	1✔
198
199	group_count += 1	1✔
200
201
202	def generate_table_structure(table_contents):	1✔
203	"""
204	Creates a renderable structure from the table structure we've defined.
205
206	:param table_contents:
207	:return: a dictionary encompassing the qualifiers, headers and values
208	"""
209
210	record = {"name": table_contents["name"], "doi": table_contents["doi"],	1✔
211	"location": table_contents["location"],
212	"related_tables" : table_contents["related_tables"],
213	"related_to_this" : table_contents["related_to_this"],
214	"qualifiers": {},
215	"qualifier_order": [], "headers": [],
216	"review": table_contents["review"],
217	"associated_files": table_contents["associated_files"],
218	"keywords": {},
219	"values": []}
220
221	record["description"] = sanitize_html(table_contents["title"])	1✔
222
223	# add in keywords
224	if table_contents['keywords'] is not None:	1✔
225	for keyword in table_contents['keywords']:	1✔
226	if keyword.name not in record['keywords']:	1✔
227	record['keywords'][keyword.name] = []	1✔
228
229	if keyword.value not in record['keywords'][keyword.name]:	1✔
230	record['keywords'][keyword.name].append(keyword.value)	1✔
231
232	tmp_values = {}	1✔
233	x_axes = OrderedDict()	1✔
234	x_headers = []	1✔
235	process_independent_variables(table_contents, x_axes, x_headers)	1✔
236	record["x_count"] = len(x_headers)	1✔
237	record["headers"] += x_headers	1✔
238
239	group_count = 0	1✔
240	yheaders = []	1✔
241
242	process_dependent_variables(group_count, record, table_contents,	1✔
243	tmp_values, x_axes, yheaders)
244
245	# attempt column merge
246	last_yheader = None	1✔
247	for counter, yheader in enumerate(yheaders):	1✔
248	if not last_yheader:	1✔
249	last_yheader = yheader	1✔
250	else:
251	if last_yheader["name"] == yheader["name"]:	1✔
252	last_yheader["colspan"] += 1	1✔
253	else:
254	record["headers"].append(last_yheader)	×
255	last_yheader = yheader	×
256	if counter == len(yheaders) - 1:	1✔
257	record["headers"].append(last_yheader)	1✔
258
259	for tmp_value in tmp_values:	1✔
260	record["values"].append(tmp_values[tmp_value])	1✔
261
262	return record	1✔
263
264
265	def str_presenter(dumper, data):	1✔
266	if "\n" in data:	1✔
267	return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='\|')	×
268	return dumper.represent_scalar('tag:yaml.org,2002:str', data)	1✔
269
270
271	def process_ctx(ctx, light_mode=False):	1✔
272	for key_to_remove in ['show_review_widget', 'show_upload_area', 'show_upload_widget',	×
273	'coordinators', 'is_submission_coordinator_or_admin', 'is_admin']:
274	ctx.pop(key_to_remove, None)	×
275
276	if light_mode:	×
277	ctx.pop('data_tables', None)	×
278	else:
279	site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')	×
280	for data_table in ctx['data_tables']:	×
281	for key_to_remove in ['review_status', 'review_flag']:	×
282	data_table.pop(key_to_remove, None)	×
283
284	if ctx['status'] == 'finished' and ctx['record']['inspire_id']:	×
285	_recid = 'ins{}'.format(ctx['record']['inspire_id'])	×
286	elif 'recid' in ctx['record']:	×
287	_recid = ctx['record']['recid']	×
288	else:
289	_recid = ctx['recid']	×
290
291	_cleaned_table_name = data_table['name'].replace('%', '%25').replace('\\', '%5C')	×
292
293	data_table['data'] = {	×
294	'json': '{0}/download/table/{1}/{2}/json'.format(
295	site_url, _recid, _cleaned_table_name),
296	'root': '{0}/download/table/{1}/{2}/root'.format(
297	site_url, _recid, _cleaned_table_name),
298	'csv': '{0}/download/table/{1}/{2}/csv'.format(
299	site_url, _recid, _cleaned_table_name),
300	'yoda': '{0}/download/table/{1}/{2}/yoda'.format(
301	site_url, _recid, _cleaned_table_name),
302	'yoda1': '{0}/download/table/{1}/{2}/yoda1'.format(
303	site_url, _recid, _cleaned_table_name),
304	'yaml': '{0}/download/table/{1}/{2}/yaml'.format(
305	site_url, _recid, _cleaned_table_name)}
306
UNCOV 307	return ctx	×

HEPData / hepdata / 6877611252

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous