• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

EQAR / eqar_backend / 19719e32-715a-4e63-ae44-c38239f50673

04 Mar 2025 05:08AM UTC coverage: 85.927% (-0.02%) from 85.948%
19719e32-715a-4e63-ae44-c38239f50673

push

circleci

JoshBone
Update populator.py

1 of 4 new or added lines in 1 file covered. (25.0%)

26 existing lines in 6 files now uncovered.

9849 of 11462 relevant lines covered (85.93%)

0.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.95
/submissionapi/csv_functions/csv_handler.py
1
import csv
1✔
2

3
import itertools
1✔
4
import re
1✔
5

6
from submissionapi.csv_functions.csv_insensitive_dict_reader import DictReaderInsensitive
1✔
7

8

9
class CSVHandler:
1✔
10
    """
11
        Class to handle CSV upload, transform it to a submission request object
12
    """
13
    FIELDS = {
1✔
14
        'reports': [
15
            r'agency',
16
            r'contributing_agencies\[\d+\]',
17
            r'report_id',
18
            r'local_identifier',
19
            r'status',
20
            r'decision',
21
            r'summary',
22
            r'valid_from',
23
            r'valid_to',
24
            r'date_format',
25
            r'other_comment'
26
        ],
27
        'activities': [
28
            r'activities\[\d+\]\.activity',
29
            r'activities\[\d+\]\.local_identifier',
30
            r'activities\[\d+\]\.agency',
31
        ],
32
        'report_links': [
33
            r'link\[\d+\]',
34
            r'link_display_name\[\d+\]'
35
        ],
36
        'report_files': [
37
            r'file\[\d+\]\.original_location',
38
            r'file\[\d+\]\.display_name',
39
        ],
40
        'report_files__report_language': [
41
            r'file\[\d+\]\.report_language\[\d+\]',
42
        ],
43
        'institutions': [
44
            r'institution\[\d+\]\.deqar_id',
45
            r'institution\[\d+\]\.eter_id',
46
            r'institution\[\d+\]\.identifier',
47
            r'institution\[\d+\]\.resource'
48
        ],
49
        'institutions__alternative_names': [
50
            r'institution\[\d+\]\.name_alternative\[\d+\]',
51
            r'institution\[\d+\]\.name_alternative_transliterated\[\d+\]',
52
        ],
53
        'institutions__locations': [
54
            r'institution\[\d+\]\.country\[\d+\]',
55
            r'institution\[\d+\]\.city\[\d+\]',
56
            r'institution\[\d+\]\.latitude\[\d+\]',
57
            r'institution\[\d+\]\.longitude\[\d+\]',
58
        ],
59
        'institutions__qf_ehea_levels': [
60
            r'institution\[\d+\]\.qf_ehea_level\[\d+\]',
61
        ],
62
        'platforms': [
63
            r'platform\[\d+\]\.deqar_id',
64
            r'platform\[\d+\]\.eter_id',
65
            r'platform\[\d+\]\.identifier',
66
            r'platform\[\d+\]\.resource'
67
        ],
68
        'platforms__alternative_names': [
69
            r'platform\[\d+\]\.name_alternative\[\d+\]',
70
            r'platform\[\d+\]\.name_alternative_transliterated\[\d+\]',
71
        ],
72
        'platforms__locations': [
73
            r'platform\[\d+\]\.country\[\d+\]',
74
            r'platform\[\d+\]\.city\[\d+\]',
75
            r'platform\[\d+\]\.latitude\[\d+\]',
76
            r'platform\[\d+\]\.longitude\[\d+\]',
77
        ],
78
        'platforms__qf_ehea_levels': [
79
            r'platform\[\d+\]\.qf_ehea_level\[\d+\]',
80
        ],
81
        'programmes': [
82
            r'programme\[\d+\]\.name_primary',
83
            r'programme\[\d+\]\.qualification_primary',
84
            r'programme\[\d+\]\.nqf_level',
85
            r'programme\[\d+\]\.qf_ehea_level',
86
            r'programme\[\d+\]\.degree_outcome',
87
            r'programme\[\d+\]\.workload_ects',
88
            r'programme\[\d+\]\.learning_outcome_description',
89
            r'programme\[\d+\]\.field_study',
90
            r'programme\[\d+\]\.assessment_certification',
91
        ],
92
        'programmes__identifiers': [
93
            r'programme\[\d+\]\.identifier\[\d+\]',
94
            r'programme\[\d+\]\.resource\[\d+\]',
95
        ],
96
        'programmes__alternative_names': [
97
            r'programme\[\d+\]\.name_alternative\[\d+\]',
98
            r'programme\[\d+\]\.qualification_alternative\[\d+\]',
99
        ],
100
        'programmes__countries': [
101
            r'programme\[\d+\]\.country\[\d+\]',
102
        ],
103
        'programmes__learning_outcomes': [
104
            r'programme\[\d+\]\.learning_outcome\[\d+\]',
105
        ]
106
    }
107

108
    def __init__(self, csvfile):
1✔
109
        self.csvfile = csvfile
1✔
110
        self.submission_data = []
1✔
111
        self.report_record = {}
1✔
112
        self.error = False
1✔
113
        self.error_message = ""
1✔
114
        self.dialect = None
1✔
115
        self.reader = None
1✔
116

117
    def handle(self):
1✔
118
        if self._csv_is_valid():
1✔
119
            self._read_csv()
1✔
120
            for row in self.reader:
1✔
121
                self._create_report(row)
1✔
122
                self._create_activities(row)
1✔
123
                self._create_report_links(row)
1✔
124
                self._create_report_files(row)
1✔
125
                self._create_institutions(row)
1✔
126
                self._create_institutions_alternative_names(row)
1✔
127
                self._create_institutions_locations(row)
1✔
128
                self._create_institutions_qf_ehea_levels(row)
1✔
129
                self._create_platforms(row)
1✔
130
                self._create_platforms_alternative_names(row)
1✔
131
                self._create_platforms_locations(row)
1✔
132
                self._create_platforms_qf_ehea_levels(row)
1✔
133
                self._create_programmes(row)
1✔
134
                self._create_programmes_alternative_names(row)
1✔
135
                self._create_programmes_identifiers(row)
1✔
136
                self._create_programmes_countries(row)
1✔
137
                self._create_learning_outcomes(row)
1✔
138
                self._clear_submission_data()
1✔
139
        else:
UNCOV
140
            self.error = True
×
UNCOV
141
            self.error_message = 'The CSV file appears to be invalid.'
×
142

143
    def _csv_is_valid(self):
1✔
144
        try:
1✔
145
            self.csvfile.seek(0)
1✔
146
            self.dialect = csv.Sniffer().sniff(self.csvfile.read(), delimiters=['\t', ',', ';'])
1✔
147
            return True
1✔
148
        except csv.Error:
1✔
149
            return False
1✔
150

151
    def _read_csv(self):
1✔
152
        self.csvfile.seek(0)
1✔
153
        self.reader = DictReaderInsensitive(self.csvfile)
1✔
154

155
    def _create_report(self, row):
1✔
156
        csv_fields = self.reader.fieldnames
1✔
157
        for field in self.FIELDS['reports']:
1✔
158
            r = re.compile(field)
1✔
159
            rematch = sorted(list(filter(r.match, csv_fields)), key=str.lower)
1✔
160

161
            if len(rematch) > 0:
1✔
162
                if 'contributing_agencies' in field:
1✔
163
                    self.report_record['contributing_agencies'] = []
1✔
164
                    for column in rematch:
1✔
165
                        self.report_record['contributing_agencies'].append(row[column])
1✔
166
                else:
167
                    self.report_record[rematch[0]] = row[rematch[0]]
1✔
168

169
    def _create_activities(self, row):
1✔
170
        self._create_first_level_placeholder(['activities'])
1✔
171
        self._create_first_level_values('activities', row, dotted=True)
1✔
172

173
    def _create_institutions(self, row):
1✔
174
        self._create_first_level_placeholder(['institutions',
1✔
175
                                              'institutions__alternative_names',
176
                                              'institutions__locations',
177
                                              'institutions__qf_ehea_levels'])
178
        self._create_first_level_values('institutions', row, dotted=True)
1✔
179

180
    def _create_institutions_alternative_names(self, row):
1✔
181
        self._create_second_level_placeholder('institutions__alternative_names', dictkey=True)
1✔
182
        self._create_second_level_values('institutions__alternative_names', row, dictkey=True)
1✔
183

184
    def _create_institutions_locations(self, row):
1✔
185
        self._create_second_level_placeholder('institutions__locations', dictkey=True)
1✔
186
        self._create_second_level_values('institutions__locations', row, dictkey=True)
1✔
187

188
    def _create_institutions_qf_ehea_levels(self, row):
1✔
189
        self._create_second_level_placeholder('institutions__qf_ehea_levels')
1✔
190
        self._create_second_level_values('institutions__qf_ehea_levels', row)
1✔
191

192
    def _create_platforms(self, row):
1✔
193
        self._create_first_level_placeholder(['platforms',
1✔
194
                                              'platforms__alternative_names',
195
                                              'platforms__locations',
196
                                              'platforms__qf_ehea_levels'])
197
        self._create_first_level_values('platforms', row, dotted=True)
1✔
198

199
    def _create_platforms_alternative_names(self, row):
1✔
200
        self._create_second_level_placeholder('platforms__alternative_names', dictkey=True)
1✔
201
        self._create_second_level_values('platforms__alternative_names', row, dictkey=True)
1✔
202

203
    def _create_platforms_locations(self, row):
1✔
204
        self._create_second_level_placeholder('platforms__locations', dictkey=True)
1✔
205
        self._create_second_level_values('platforms__locations', row, dictkey=True)
1✔
206

207
    def _create_platforms_qf_ehea_levels(self, row):
1✔
208
        self._create_second_level_placeholder('platforms__qf_ehea_levels')
1✔
209
        self._create_second_level_values('platforms__qf_ehea_levels', row)
1✔
210

211
    def _create_programmes(self, row):
1✔
212
        self._create_first_level_placeholder(['programmes',
1✔
213
                                              'programmes__identifiers',
214
                                              'programmes__alternative_names',
215
                                              'programmes__countries',
216
                                              'programmes__learning_outcomes'])
217
        self._create_first_level_values('programmes', row, dotted=True)
1✔
218

219
    def _create_programmes_identifiers(self, row):
1✔
220
        self._create_second_level_placeholder('programmes__identifiers', dictkey=True)
1✔
221
        self._create_second_level_values('programmes__identifiers', row, dictkey=True)
1✔
222

223
    def _create_programmes_alternative_names(self, row):
1✔
224
        self._create_second_level_placeholder('programmes__alternative_names', dictkey=True)
1✔
225
        self._create_second_level_values('programmes__alternative_names', row, dictkey=True)
1✔
226

227
    def _create_programmes_countries(self, row):
1✔
228
        self._create_second_level_placeholder('programmes__countries')
1✔
229
        self._create_second_level_values('programmes__countries', row)
1✔
230

231
    def _create_learning_outcomes(self, row):
1✔
232
        self._create_second_level_placeholder('programmes__learning_outcomes')
1✔
233
        self._create_second_level_values('programmes__learning_outcomes', row)
1✔
234

235
    def _create_report_links(self, row):
1✔
236
        self._create_first_level_placeholder(['report_links'])
1✔
237
        self._create_first_level_values('report_links', row)
1✔
238

239
    def _create_report_files(self, row):
1✔
240
        self._create_first_level_placeholder(['report_files', 'report_files__report_language'])
1✔
241
        self._create_first_level_values('report_files', row, dotted=True)
1✔
242

243
        self._create_second_level_placeholder('report_files__report_language')
1✔
244
        self._create_second_level_values('report_files__report_language', row)
1✔
245

246
    def _create_first_level_placeholder(self, field_key_array):
1✔
247
        fields = []
1✔
248
        wrapper = field_key_array[0].split('__')[0]
1✔
249
        csv_fields = self.reader.fieldnames
1✔
250

251
        for fk in field_key_array:
1✔
252
            fields += self.FIELDS[fk]
1✔
253

254
        # Create wrapper
255
        self.report_record[wrapper] = []
1✔
256

257
        for field in fields:
1✔
258
            r = re.compile(field)
1✔
259

260
            max_index = 0
1✔
261
            for csv_field in csv_fields:
1✔
262
                match = r.match(csv_field)
1✔
263
                if match:
1✔
264
                    groups = re.search(r"\d+", csv_field)
1✔
265
                    index = int(groups.group(0))
1✔
266
                    if index > max_index:
1✔
267
                        max_index = index
1✔
268

269
            # Create plaholder if it doesn't exists yet
270
            if max_index > 0:
1✔
271
                for i in range(0, max_index):
1✔
272
                    if len(self.report_record[wrapper]) < i+1:
1✔
273
                        self.report_record[wrapper].append({})
1✔
274

275
    def _create_first_level_values(self, wrapper, row, dotted=False):
1✔
276
        csv_fields = self.reader.fieldnames
1✔
277
        for field in self.FIELDS[wrapper]:
1✔
278
            r = re.compile(field)
1✔
279
            rematch = sorted(list(filter(r.match, csv_fields)), key=str.lower)
1✔
280

281
            if len(rematch) > 0:
1✔
282
                for fld in rematch:
1✔
283
                    if row[fld] != '-':
1✔
284
                        index = re.search(r"\[\d+\]", fld).group()
1✔
285
                        field = fld.replace(index, "")
1✔
286
                        index = int(re.search(r"\d+", index).group())-1
1✔
287
                        if dotted:
1✔
288
                            field = field.split('.')[1]
1✔
289
                        self.report_record[wrapper][index][field] = row[fld]
1✔
290

291
    def _create_second_level_placeholder(self, field_key, dictkey=None):
1✔
292
        csv_fields = self.reader.fieldnames
1✔
293
        first_level_wrapper_name, wrapper = field_key.split('__')
1✔
294

295
        # Create second level wrapper
296
        first_level_wrapper = self.report_record[first_level_wrapper_name]
1✔
297

298
        for first_level_wrapper_item in first_level_wrapper:
1✔
299
            first_level_wrapper_item[wrapper] = []
1✔
300

301
            if dictkey:
1✔
302
                for field in self.FIELDS[field_key]:
1✔
303
                    r = re.compile(field)
1✔
304

305
                    max_index = 0
1✔
306
                    for csv_field in csv_fields:
1✔
307
                        match = r.match(csv_field)
1✔
308
                        if match:
1✔
309
                            groups = re.search(r"\d+", csv_field)
1✔
310
                            index = int(groups.group(0))
1✔
311
                            if index > max_index:
1✔
312
                                max_index = index
1✔
313

314
                    # Create plaholder if it doesn't exists yet
315
                    if max_index > 0:
1✔
316
                        for i in range(0, max_index):
1✔
317
                            if len(first_level_wrapper_item[wrapper]) < i+1:
1✔
318
                                first_level_wrapper_item[wrapper].append({})
1✔
319

320
    def _create_second_level_values(self, field_key, row, dictkey=None):
1✔
321
        csv_fields = self.reader.fieldnames
1✔
322
        first_level_wrapper_name, wrapper = field_key.split('__')
1✔
323
        first_level_wrapper = self.report_record[first_level_wrapper_name]
1✔
324

325
        for field in self.FIELDS[field_key]:
1✔
326
            r = re.compile(field)
1✔
327
            rematch = sorted(list(filter(r.match, csv_fields)), key=str.lower)
1✔
328

329
            if len(rematch) > 0:
1✔
330
                for fld in rematch:
1✔
331
                    if row[fld] != '-':
1✔
332
                        [field01, field02] = fld.split('.')
1✔
333
                        index01 = int(re.search(r"\d+", field01).group())
1✔
334

335
                        index02 = re.search(r"\[\d+\]", field02).group()
1✔
336
                        field02 = field02.replace(index02, "")
1✔
337
                        index02 = int(re.search(r"\d+", index02).group())
1✔
338
                        if dictkey:
1✔
339
                            first_level_wrapper[index01-1][wrapper][index02-1][field02] = row[fld]
1✔
340
                        else:
341
                            first_level_wrapper[index01-1][wrapper].append(row[fld])
1✔
342

343
    def _clear_submission_data(self):
1✔
344
        self.submission_data.append(self.clean_empty(self.report_record))
1✔
345

346
    def clean_empty(self, d):
1✔
347
        if not isinstance(d, (dict, list)):
1✔
348
            return d
1✔
349
        if isinstance(d, list):
1✔
350
            return [v for v in (self.clean_empty(v) for v in d) if v]
1✔
351
        return {k: v for k, v in ((k, self.clean_empty(v)) for k, v in d.items()) if v}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc