• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

EQAR / eqar_backend / e01b66e3-7b01-4ac2-bdbe-46705f1eec5e

30 Mar 2025 08:23PM UTC coverage: 87.665% (+11.4%) from 76.228%
e01b66e3-7b01-4ac2-bdbe-46705f1eec5e

push

circleci

web-flow
Merge pull request #537 from EQAR/development

fix institution index signal for Meilisearch

8 of 9 new or added lines in 1 file covered. (88.89%)

288 existing lines in 28 files now uncovered.

10255 of 11698 relevant lines covered (87.66%)

0.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.8
/submissionapi/csv_functions/csv_handler.py
1
import csv
1✔
2
import re
1✔
3

4
from submissionapi.csv_functions.csv_insensitive_dict_reader import DictReaderInsensitive
1✔
5

6

7
class CSVHandler:
1✔
8
    """
9
        Class to handle CSV upload, transform it to a submission request object
10
    """
11
    FIELDS = {
1✔
12
        'reports': [
13
            r'agency',
14
            r'contributing_agencies\[\d+\]',
15
            r'report_id',
16
            r'local_identifier',
17
            r'status',
18
            r'decision',
19
            r'summary',
20
            r'valid_from',
21
            r'valid_to',
22
            r'date_format',
23
            r'other_comment'
24
        ],
25
        'activities': [
26
            r'activities\[\d+\]\.id',
27
            r'activities\[\d+\]\.local_identifier',
28
            r'activities\[\d+\]\.agency',
29
            r'activities\[\d+\]\.group',
30
        ],
31
        'report_links': [
32
            r'link\[\d+\]',
33
            r'link_display_name\[\d+\]'
34
        ],
35
        'report_files': [
36
            r'file\[\d+\]\.original_location',
37
            r'file\[\d+\]\.display_name',
38
            r'file\[\d+\]\.file',
39
            r'file\[\d+\]\.file_name',
40
        ],
41
        'report_files__report_language': [
42
            r'file\[\d+\]\.report_language\[\d+\]',
43
        ],
44
        'institutions': [
45
            r'institution\[\d+\]\.deqar_id',
46
            r'institution\[\d+\]\.eter_id',
47
            r'institution\[\d+\]\.identifier',
48
            r'institution\[\d+\]\.resource'
49
        ],
50
        'platforms': [
51
            r'platform\[\d+\]\.deqar_id',
52
            r'platform\[\d+\]\.eter_id',
53
            r'platform\[\d+\]\.identifier',
54
            r'platform\[\d+\]\.resource'
55
        ],
56
        'programmes': [
57
            r'programme\[\d+\]\.name_primary',
58
            r'programme\[\d+\]\.qualification_primary',
59
            r'programme\[\d+\]\.nqf_level',
60
            r'programme\[\d+\]\.qf_ehea_level',
61
            r'programme\[\d+\]\.degree_outcome',
62
            r'programme\[\d+\]\.workload_ects',
63
            r'programme\[\d+\]\.learning_outcome_description',
64
            r'programme\[\d+\]\.field_study',
65
            r'programme\[\d+\]\.assessment_certification',
66
        ],
67
        'programmes__identifiers': [
68
            r'programme\[\d+\]\.identifier\[\d+\]',
69
            r'programme\[\d+\]\.resource\[\d+\]',
70
        ],
71
        'programmes__alternative_names': [
72
            r'programme\[\d+\]\.name_alternative\[\d+\]',
73
            r'programme\[\d+\]\.qualification_alternative\[\d+\]',
74
        ],
75
        'programmes__countries': [
76
            r'programme\[\d+\]\.country\[\d+\]',
77
        ],
78
        'programmes__learning_outcomes': [
79
            r'programme\[\d+\]\.learning_outcome\[\d+\]',
80
        ]
81
    }
82

83
    def __init__(self, csvfile):
1✔
84
        self.csvfile = csvfile
1✔
85
        self.submission_data = []
1✔
86
        self.report_record = {}
1✔
87
        self.error = False
1✔
88
        self.error_message = ""
1✔
89
        self.dialect = None
1✔
90
        self.reader = None
1✔
91

92
    def handle(self):
1✔
93
        if self._csv_is_valid():
1✔
94
            self._read_csv()
1✔
95
            for row in self.reader:
1✔
96
                self._create_report(row)
1✔
97
                self._create_activities(row)
1✔
98
                self._create_report_links(row)
1✔
99
                self._create_report_files(row)
1✔
100
                self._create_institutions(row)
1✔
101
                self._create_platforms(row)
1✔
102
                self._create_programmes(row)
1✔
103
                self._create_programmes_alternative_names(row)
1✔
104
                self._create_programmes_identifiers(row)
1✔
105
                self._create_programmes_countries(row)
1✔
106
                self._create_learning_outcomes(row)
1✔
107
                self._clear_submission_data()
1✔
108
        else:
UNCOV
109
            self.error = True
×
UNCOV
110
            self.error_message = 'The CSV file appears to be invalid.'
×
111

112
    def _csv_is_valid(self):
1✔
113
        try:
1✔
114
            self.csvfile.seek(0)
1✔
115
            self.dialect = csv.Sniffer().sniff(self.csvfile.read(), delimiters=['\t', ',', ';'])
1✔
116
            return True
1✔
117
        except csv.Error:
1✔
118
            return False
1✔
119

120
    def _read_csv(self):
1✔
121
        self.csvfile.seek(0)
1✔
122
        self.reader = DictReaderInsensitive(self.csvfile)
1✔
123

124
    def _create_report(self, row):
1✔
125
        csv_fields = self.reader.fieldnames
1✔
126
        for field in self.FIELDS['reports']:
1✔
127
            r = re.compile(field)
1✔
128
            rematch = sorted(list(filter(r.match, csv_fields)), key=str.lower)
1✔
129

130
            if len(rematch) > 0:
1✔
131
                if 'contributing_agencies' in field:
1✔
132
                    self.report_record['contributing_agencies'] = []
1✔
133
                    for column in rematch:
1✔
134
                        self.report_record['contributing_agencies'].append(row[column])
1✔
135
                else:
136
                    self.report_record[rematch[0]] = row[rematch[0]]
1✔
137

138
    def _create_activities(self, row):
1✔
139
        self._create_first_level_placeholder(['activities'])
1✔
140
        self._create_first_level_values('activities', row, dotted=True)
1✔
141

142
    def _create_institutions(self, row):
1✔
143
        self._create_first_level_placeholder(['institutions'])
1✔
144
        self._create_first_level_values('institutions', row, dotted=True)
1✔
145

146
    def _create_platforms(self, row):
1✔
147
        self._create_first_level_placeholder(['platforms'])
1✔
148
        self._create_first_level_values('platforms', row, dotted=True)
1✔
149

150
    def _create_programmes(self, row):
1✔
151
        self._create_first_level_placeholder(['programmes',
1✔
152
                                              'programmes__identifiers',
153
                                              'programmes__alternative_names',
154
                                              'programmes__countries',
155
                                              'programmes__learning_outcomes'])
156
        self._create_first_level_values('programmes', row, dotted=True)
1✔
157

158
    def _create_programmes_identifiers(self, row):
1✔
159
        self._create_second_level_placeholder('programmes__identifiers', dictkey=True)
1✔
160
        self._create_second_level_values('programmes__identifiers', row, dictkey=True)
1✔
161

162
    def _create_programmes_alternative_names(self, row):
1✔
163
        self._create_second_level_placeholder('programmes__alternative_names', dictkey=True)
1✔
164
        self._create_second_level_values('programmes__alternative_names', row, dictkey=True)
1✔
165

166
    def _create_programmes_countries(self, row):
1✔
167
        self._create_second_level_placeholder('programmes__countries')
1✔
168
        self._create_second_level_values('programmes__countries', row)
1✔
169

170
    def _create_learning_outcomes(self, row):
1✔
171
        self._create_second_level_placeholder('programmes__learning_outcomes')
1✔
172
        self._create_second_level_values('programmes__learning_outcomes', row)
1✔
173

174
    def _create_report_links(self, row):
1✔
175
        self._create_first_level_placeholder(['report_links'])
1✔
176
        self._create_first_level_values('report_links', row)
1✔
177

178
    def _create_report_files(self, row):
1✔
179
        self._create_first_level_placeholder(['report_files', 'report_files__report_language'])
1✔
180
        self._create_first_level_values('report_files', row, dotted=True)
1✔
181

182
        self._create_second_level_placeholder('report_files__report_language')
1✔
183
        self._create_second_level_values('report_files__report_language', row)
1✔
184

185
    def _create_first_level_placeholder(self, field_key_array):
1✔
186
        fields = []
1✔
187
        wrapper = field_key_array[0].split('__')[0]
1✔
188
        csv_fields = self.reader.fieldnames
1✔
189

190
        for fk in field_key_array:
1✔
191
            fields += self.FIELDS[fk]
1✔
192

193
        # Create wrapper
194
        self.report_record[wrapper] = []
1✔
195

196
        for field in fields:
1✔
197
            r = re.compile(field)
1✔
198

199
            max_index = 0
1✔
200
            for csv_field in csv_fields:
1✔
201
                match = r.match(csv_field)
1✔
202
                if match:
1✔
203
                    groups = re.search(r"\d+", csv_field)
1✔
204
                    index = int(groups.group(0))
1✔
205
                    if index > max_index:
1✔
206
                        max_index = index
1✔
207

208
            # Create plaholder if it doesn't exists yet
209
            if max_index > 0:
1✔
210
                for i in range(0, max_index):
1✔
211
                    if len(self.report_record[wrapper]) < i+1:
1✔
212
                        self.report_record[wrapper].append({})
1✔
213

214
    def _create_first_level_values(self, wrapper, row, dotted=False):
1✔
215
        csv_fields = self.reader.fieldnames
1✔
216
        for field in self.FIELDS[wrapper]:
1✔
217
            r = re.compile(field)
1✔
218
            rematch = sorted(list(filter(r.match, csv_fields)), key=str.lower)
1✔
219

220
            if len(rematch) > 0:
1✔
221
                for fld in rematch:
1✔
222
                    if row[fld] != '-':
1✔
223
                        index = re.search(r"\[\d+\]", fld).group()
1✔
224
                        field = fld.replace(index, "")
1✔
225
                        index = int(re.search(r"\d+", index).group())-1
1✔
226
                        if dotted:
1✔
227
                            field = field.split('.')[1]
1✔
228
                        self.report_record[wrapper][index][field] = row[fld]
1✔
229

230
    def _create_second_level_placeholder(self, field_key, dictkey=None):
1✔
231
        csv_fields = self.reader.fieldnames
1✔
232
        first_level_wrapper_name, wrapper = field_key.split('__')
1✔
233

234
        # Create second level wrapper
235
        first_level_wrapper = self.report_record[first_level_wrapper_name]
1✔
236

237
        for first_level_wrapper_item in first_level_wrapper:
1✔
238
            first_level_wrapper_item[wrapper] = []
1✔
239

240
            if dictkey:
1✔
241
                for field in self.FIELDS[field_key]:
1✔
242
                    r = re.compile(field)
1✔
243

244
                    max_index = 0
1✔
245
                    for csv_field in csv_fields:
1✔
246
                        match = r.match(csv_field)
1✔
247
                        if match:
1✔
248
                            groups = re.search(r"\d+", csv_field)
1✔
249
                            index = int(groups.group(0))
1✔
250
                            if index > max_index:
1✔
251
                                max_index = index
1✔
252

253
                    # Create plaholder if it doesn't exists yet
254
                    if max_index > 0:
1✔
255
                        for i in range(0, max_index):
1✔
256
                            if len(first_level_wrapper_item[wrapper]) < i+1:
1✔
257
                                first_level_wrapper_item[wrapper].append({})
1✔
258

259
    def _create_second_level_values(self, field_key, row, dictkey=None):
1✔
260
        csv_fields = self.reader.fieldnames
1✔
261
        first_level_wrapper_name, wrapper = field_key.split('__')
1✔
262
        first_level_wrapper = self.report_record[first_level_wrapper_name]
1✔
263

264
        for field in self.FIELDS[field_key]:
1✔
265
            r = re.compile(field)
1✔
266
            rematch = sorted(list(filter(r.match, csv_fields)), key=str.lower)
1✔
267

268
            if len(rematch) > 0:
1✔
269
                for fld in rematch:
1✔
270
                    if row[fld] != '-':
1✔
271
                        [field01, field02] = fld.split('.')
1✔
272
                        index01 = int(re.search(r"\d+", field01).group())
1✔
273

274
                        index02 = re.search(r"\[\d+\]", field02).group()
1✔
275
                        field02 = field02.replace(index02, "")
1✔
276
                        index02 = int(re.search(r"\d+", index02).group())
1✔
277
                        if dictkey:
1✔
278
                            first_level_wrapper[index01-1][wrapper][index02-1][field02] = row[fld]
1✔
279
                        else:
280
                            first_level_wrapper[index01-1][wrapper].append(row[fld])
1✔
281

282
    def _clear_submission_data(self):
1✔
283
        self.submission_data.append(self.clean_empty(self.report_record))
1✔
284

285
    def clean_empty(self, d):
1✔
286
        if not isinstance(d, (dict, list)):
1✔
287
            return d
1✔
288
        if isinstance(d, list):
1✔
289
            return [v for v in (self.clean_empty(v) for v in d) if v]
1✔
290
        return {k: v for k, v in ((k, self.clean_empty(v)) for k, v in d.items()) if v}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc