• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenDataServices / flatten-tool / 10202339569

01 Aug 2024 04:44PM UTC coverage: 95.709%. Remained the same
10202339569

push

github

Bjwebb
errors: Use custom exceptions

https://github.com/OpenDataServices/flatten-tool/issues/450

This makes it easier to diambiguate errors deliberately raised by
flatten-tool versus those from other sources. I've left alone a few
exceptions that flatten-tool raises, but which we don't expect to
happen, so didn't seem to be in the same category.

20 of 35 new or added lines in 6 files covered. (57.14%)

57 existing lines in 5 files now uncovered.

3390 of 3542 relevant lines covered (95.71%)

11.42 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.91
/flattentool/input.py
1
"""
2
This file has classes describing input from spreadsheets.
3

4
"""
5

6
from __future__ import print_function, unicode_literals
12✔
7

8
import datetime
12✔
9
import os
12✔
10
from collections import OrderedDict, UserDict
12✔
11
from csv import DictReader
12✔
12
from csv import reader as csvreader
12✔
13
from decimal import Decimal, InvalidOperation
12✔
14
from warnings import warn
12✔
15

16
import openpyxl
12✔
17
import pytz
12✔
18

19
try:
12✔
20
    import geojson
12✔
21
    import shapely.wkt
12✔
22

23
    SHAPELY_AND_GEOJSON_LIBRARIES_AVAILABLE = True
12✔
24
except ImportError:
12✔
25
    SHAPELY_AND_GEOJSON_LIBRARIES_AVAILABLE = False
12✔
26

27
from openpyxl.utils.cell import get_column_letter
12✔
28

29
from flattentool.exceptions import (
12✔
30
    DataErrorWarning,
31
    FlattenToolError,
32
    FlattenToolValueError,
33
    FlattenToolWarning,
34
)
35
from flattentool.i18n import _
12✔
36
from flattentool.lib import isint, parse_sheet_configuration
12✔
37
from flattentool.ODSReader import ODSReader
12✔
38

39
try:
12✔
40
    from zipfile import BadZipFile
12✔
UNCOV
41
except ImportError:
×
UNCOV
42
    from zipfile import BadZipfile as BadZipFile
×
43

44

45
class Cell:
12✔
46
    def __init__(self, cell_value, cell_location):
12✔
47
        self.cell_value = cell_value
12✔
48
        self.cell_location = cell_location
12✔
49
        self.sub_cells = []
12✔
50

51

52
# Avoid _csv.Error "line contains NUL" in Python < 3.11.
53
class NullCharacterFilter:
12✔
54
    def __init__(self, file):
12✔
55
        self.file = file
12✔
56

57
    def __iter__(self):
12✔
58
        return self
12✔
59

60
    def __next__(self):
12✔
61
        """
62
        Remove null characters read from the file.
63
        """
64
        return next(self.file).replace("\0", "")
12✔
65

66

67
def convert_type(type_string, value, timezone=pytz.timezone("UTC"), convert_flags={}):
12✔
68
    if value == "" or value is None:
12✔
69
        return None
12✔
70
    if type_string == "number":
12✔
71
        try:
12✔
72
            return Decimal(value)
12✔
73
        except (TypeError, ValueError, InvalidOperation):
12✔
74
            warn(
12✔
75
                _(
76
                    'Non-numeric value "{}" found in number column, returning as string instead.'
77
                ).format(value),
78
                DataErrorWarning,
79
            )
80
            return str(value)
12✔
81
    elif type_string == "integer":
12✔
82
        try:
12✔
83
            return int(value)
12✔
84
        except (TypeError, ValueError):
12✔
85
            warn(
12✔
86
                _(
87
                    'Non-integer value "{}" found in integer column, returning as string instead.'
88
                ).format(value),
89
                DataErrorWarning,
90
            )
91
            return str(value)
12✔
92
    elif type_string == "boolean":
12✔
93
        value = str(value)
12✔
94
        if value.lower() in ["true", "1"]:
12✔
95
            return True
12✔
96
        elif value.lower() in ["false", "0"]:
12✔
97
            return False
12✔
98
        else:
99
            warn(
12✔
100
                _(
101
                    'Unrecognised value for boolean: "{}", returning as string instead'
102
                ).format(value),
103
                DataErrorWarning,
104
            )
105
            return str(value)
12✔
106
    elif type_string in ("array", "array_array", "string_array", "number_array"):
12✔
107
        value = str(value)
12✔
108
        if type_string == "number_array":
12✔
109
            try:
12✔
110
                if "," in value:
12✔
111
                    return [
12✔
112
                        [Decimal(y) for y in x.split(",")] for x in value.split(";")
113
                    ]
114
                else:
115
                    return [Decimal(x) for x in value.split(";")]
12✔
116
            except (TypeError, ValueError, InvalidOperation):
12✔
117
                warn(
12✔
118
                    _(
119
                        'Non-numeric value "{}" found in number array column, returning as string array instead).'
120
                    ).format(value),
121
                    DataErrorWarning,
122
                )
123
        if "," in value:
12✔
124
            return [x.split(",") for x in value.split(";")]
12✔
125
        else:
126
            return value.split(";")
12✔
127
    elif type_string == "string":
12✔
128
        if type(value) == datetime.datetime:
12✔
129
            return timezone.localize(value).isoformat()
12✔
130
        return str(value)
12✔
131
    elif type_string == "date":
12✔
132
        if type(value) == datetime.datetime:
12✔
133
            return value.date().isoformat()
12✔
134
        return str(value)
12✔
135
    elif convert_flags.get("wkt") and type_string == "geojson":
12✔
136
        if SHAPELY_AND_GEOJSON_LIBRARIES_AVAILABLE:
12✔
137
            try:
12✔
138
                geom = shapely.wkt.loads(value)
12✔
139
            except shapely.errors.GEOSException as e:
12✔
140
                warn(
12✔
141
                    _(
142
                        'An invalid WKT string was supplied "{value}", the message from the parser was: {parser_msg}'
143
                    ).format(value=value, parser_msg=str(e)),
144
                    DataErrorWarning,
145
                )
146
                return
12✔
147
            feature = geojson.Feature(geometry=geom, properties={})
12✔
148
            return feature.geometry
12✔
149
        else:
UNCOV
150
            warn(
×
151
                "Install flattentool's optional geo dependencies to use geo features.",
152
                FlattenToolWarning,
153
            )
UNCOV
154
            return str(value)
×
155
    elif type_string == "":
12✔
156
        if type(value) == datetime.datetime:
12✔
157
            return timezone.localize(value).isoformat()
12✔
158
        if type(value) == float and int(value) == value:
12✔
159
            return int(value)
12✔
160
        return value if type(value) in [int] else str(value)
12✔
161
    else:
162
        raise FlattenToolValueError('Unrecognised type: "{}"'.format(type_string))
12✔
163

164

165
def warnings_for_ignored_columns(v, extra_message):
12✔
166
    if isinstance(v, Cell):
12✔
167
        warn(
12✔
168
            "Column {} has been ignored, {}".format(v.cell_location[3], extra_message),
169
            DataErrorWarning,
170
        )
171
    elif isinstance(v, dict):
12✔
172
        for x in v.values():
12✔
173
            warnings_for_ignored_columns(x, extra_message)
12✔
174
    elif isinstance(v, TemporaryDict):
12✔
175
        for x in v.to_list():
12✔
176
            warnings_for_ignored_columns(x, extra_message)
12✔
177
    else:
NEW
178
        raise FlattenToolValueError()
×
179

180

181
def merge(base, mergee, debug_info=None):
12✔
182
    if not debug_info:
12✔
183
        debug_info = {}
×
184
    for key, v in mergee.items():
12✔
185
        if isinstance(v, Cell):
12✔
186
            value = v.cell_value
12✔
187
        else:
188
            value = v
12✔
189
        if key in base:
12✔
190
            if isinstance(value, TemporaryDict):
12✔
191
                if not isinstance(base[key], TemporaryDict):
12✔
192
                    warnings_for_ignored_columns(
12✔
193
                        v,
194
                        _(
195
                            "because it treats {} as an array, but another column does not"
196
                        ).format(key),
197
                    )
198
                    continue
12✔
199
                for temporarydict_key, temporarydict_value in value.items():
12✔
200
                    if temporarydict_key in base[key]:
12✔
201
                        merge(
12✔
202
                            base[key][temporarydict_key],
203
                            temporarydict_value,
204
                            debug_info,
205
                        )
206
                    else:
207
                        assert temporarydict_key not in base[key], _(
12✔
208
                            "Overwriting cell {} by mistake"
209
                        ).format(temporarydict_value)
210
                        base[key][temporarydict_key] = temporarydict_value
12✔
211
                for temporarydict_value in value.items_no_keyfield:
12✔
212
                    base[key].items_no_keyfield.append(temporarydict_value)
12✔
213
            elif isinstance(value, dict):
12✔
214
                if isinstance(base[key], dict):
12✔
215
                    merge(base[key], value, debug_info)
12✔
216
                else:
217
                    warnings_for_ignored_columns(
12✔
218
                        v,
219
                        _(
220
                            "because it treats {} as an object, but another column does not"
221
                        ).format(key),
222
                    )
223
            else:
224
                if not isinstance(base[key], Cell):
12✔
225
                    id_info = '{} "{}"'.format(
12✔
226
                        debug_info.get("id_name"),
227
                        debug_info.get(debug_info.get("id_name")),
228
                    )
229
                    if debug_info.get("root_id"):
12✔
230
                        id_info = (
12✔
231
                            '{} "{}", '.format(
232
                                debug_info.get("root_id"),
233
                                debug_info.get("root_id_or_none"),
234
                            )
235
                            + id_info
236
                        )
237
                    warnings_for_ignored_columns(
12✔
238
                        v, _("because another column treats it as an array or object")
239
                    )
240
                    continue
12✔
241
                base_value = base[key].cell_value
12✔
242
                if base_value != value:
12✔
243
                    id_info = '{} "{}"'.format(
12✔
244
                        debug_info.get("id_name"),
245
                        debug_info.get(debug_info.get("id_name")),
246
                    )
247
                    if debug_info.get("root_id"):
12✔
248
                        id_info = (
12✔
249
                            '{} "{}", '.format(
250
                                debug_info.get("root_id"),
251
                                debug_info.get("root_id_or_none"),
252
                            )
253
                            + id_info
254
                        )
255
                    warn(
12✔
256
                        _(
257
                            'You may have a duplicate Identifier: We couldn\'t merge these rows with the {}: field "{}" in sheet "{}": one cell has the value: "{}", the other cell has the value: "{}"'
258
                        ).format(
259
                            id_info,
260
                            key,
261
                            debug_info.get("sheet_name"),
262
                            base_value,
263
                            value,
264
                        ),
265
                        DataErrorWarning,
266
                    )
267
                else:
268
                    base[key].sub_cells.append(v)
12✔
269
        else:
270
            # This happens when a parent record finds the first a child record of a known type
271
            base[key] = v
12✔
272

273

274
class SpreadsheetInput(object):
12✔
275
    """
276
    Base class describing a spreadsheet input. Has stubs which are
277
    implemented via inheritance for particular types of spreadsheet (e.g. xlsx
278
    or csv).
279

280
    """
281

282
    def convert_dict_titles(self, dicts, title_lookup=None):
12✔
283
        """
284
        Replace titles with field names in the given list of dictionaries
285
        (``dicts``) using the titles lookup in the schema parser.
286

287
        """
288
        if self.parser:
12✔
289
            title_lookup = self.parser.title_lookup
12✔
290
        for d in dicts:
12✔
291
            if title_lookup:
12✔
292
                yield OrderedDict(
12✔
293
                    [(title_lookup.lookup_header(k), v) for k, v in d.items()]
294
                )
295
            else:
296
                yield d
12✔
297

298
    def __init__(
12✔
299
        self,
300
        input_name="",
301
        root_list_path="main",
302
        root_is_list=False,
303
        timezone_name="UTC",
304
        root_id="ocid",
305
        convert_titles=False,
306
        vertical_orientation=False,
307
        include_sheets=[],
308
        exclude_sheets=[],
309
        id_name="id",
310
        xml=False,
311
        base_configuration={},
312
        use_configuration=True,
313
        convert_flags={},
314
    ):
315
        self.input_name = input_name
12✔
316
        self.root_list_path = root_list_path
12✔
317
        self.root_is_list = root_is_list
12✔
318
        self.sub_sheet_names = []
12✔
319
        self.timezone = pytz.timezone(timezone_name)
12✔
320
        self.root_id = root_id
12✔
321
        self.convert_titles = convert_titles
12✔
322
        self.id_name = id_name
12✔
323
        self.xml = xml
12✔
324
        self.parser = None
12✔
325
        self.vertical_orientation = vertical_orientation
12✔
326
        self.include_sheets = include_sheets
12✔
327
        self.exclude_sheets = exclude_sheets
12✔
328
        self.base_configuration = base_configuration or {}
12✔
329
        self.sheet_configuration = {}
12✔
330
        self.use_configuration = use_configuration
12✔
331
        self.convert_flags = convert_flags
12✔
332

333
    def get_sub_sheets_lines(self):
12✔
334
        for sub_sheet_name in self.sub_sheet_names:
12✔
335
            if self.convert_titles:
12✔
336
                yield sub_sheet_name, self.convert_dict_titles(
12✔
337
                    self.get_sheet_lines(sub_sheet_name),
338
                    self.parser.sub_sheets[sub_sheet_name].title_lookup
339
                    if sub_sheet_name in self.parser.sub_sheets
340
                    else None,
341
                )
342
            else:
343
                yield sub_sheet_name, self.get_sheet_lines(sub_sheet_name)
12✔
344

345
    def configure_sheets(self):
12✔
346
        for sub_sheet_name in self.sub_sheet_names:
12✔
347
            self.sheet_configuration[sub_sheet_name] = parse_sheet_configuration(
12✔
348
                self.get_sheet_configuration(sub_sheet_name)
349
            )
350

351
    def get_sheet_configuration(self, sheet_name):
12✔
UNCOV
352
        return []
×
353

354
    def get_sheet_lines(self, sheet_name):
12✔
355
        raise NotImplementedError
12✔
356

357
    def get_sheet_headings(self, sheet_name):
12✔
358
        raise NotImplementedError
12✔
359

360
    def read_sheets(self):
12✔
361
        raise NotImplementedError
12✔
362

363
    def do_unflatten(self):
12✔
364
        main_sheet_by_ocid = OrderedDict()
12✔
365
        sheets = list(self.get_sub_sheets_lines())
12✔
366
        for i, sheet in enumerate(sheets):
12✔
367
            sheet_name, lines = sheet
12✔
368
            try:
12✔
369
                actual_headings = self.get_sheet_headings(sheet_name)
12✔
370
                # If sheet is empty or too many lines have been skipped
371
                if not actual_headings:
12✔
372
                    continue
12✔
373
                found = OrderedDict()
12✔
374
                last_col = len(actual_headings)
12✔
375
                # We want to ignore data in earlier columns, so we look
376
                # through the data backwards
377
                for i, actual_heading in enumerate(reversed(actual_headings)):
12✔
378
                    if actual_heading is None:
12✔
379
                        continue
12✔
380
                    if actual_heading in found:
12✔
381
                        found[actual_heading].append((last_col - i) - 1)
12✔
382
                    else:
383
                        found[actual_heading] = [i]
12✔
384
                for actual_heading in reversed(found):
12✔
385
                    if len(found[actual_heading]) > 1:
12✔
386
                        keeping = found[actual_heading][0]  # noqa
12✔
387
                        ignoring = found[actual_heading][1:]
12✔
388
                        ignoring.reverse()
12✔
389
                        if len(ignoring) >= 3:
12✔
390
                            warn(
12✔
391
                                (
392
                                    _(
393
                                        'Duplicate heading "{}" found, ignoring '
394
                                        'the data in columns {} and {} (sheet: "{}").'
395
                                    )
396
                                ).format(
397
                                    actual_heading,
398
                                    ", ".join(
399
                                        [
400
                                            get_column_letter(x + 1)
401
                                            for x in ignoring[:-1]
402
                                        ]
403
                                    ),
404
                                    get_column_letter(ignoring[-1] + 1),
405
                                    sheet_name,
406
                                ),
407
                                DataErrorWarning,
408
                            )
409
                        elif len(found[actual_heading]) == 3:
12✔
410
                            warn(
12✔
411
                                (
412
                                    _(
413
                                        'Duplicate heading "{}" found, ignoring '
414
                                        'the data in columns {} and {} (sheet: "{}").'
415
                                    )
416
                                ).format(
417
                                    actual_heading,
418
                                    get_column_letter(ignoring[0] + 1),
419
                                    get_column_letter(ignoring[1] + 1),
420
                                    sheet_name,
421
                                ),
422
                                DataErrorWarning,
423
                            )
424
                        else:
425
                            warn(
12✔
426
                                (
427
                                    _(
428
                                        'Duplicate heading "{}" found, ignoring '
429
                                        'the data in column {} (sheet: "{}").'
430
                                    )
431
                                ).format(
432
                                    actual_heading,
433
                                    get_column_letter(ignoring[0] + 1),
434
                                    sheet_name,
435
                                ),
436
                                DataErrorWarning,
437
                            )
438
            except NotImplementedError:
12✔
439
                # The ListInput type used in the tests doesn't support getting headings.
440
                actual_headings = None
12✔
441
            for j, line in enumerate(lines):
12✔
442
                if all(x is None or x == "" for x in line.values()):
12✔
443
                    # if all(x == '' for x in line.values()):
444
                    continue
12✔
445
                root_id_or_none = line.get(self.root_id) if self.root_id else None
12✔
446
                cells = OrderedDict()
12✔
447
                for k, header in enumerate(line):
12✔
448
                    heading = actual_headings[k] if actual_headings else header
12✔
449
                    if self.vertical_orientation:
12✔
450
                        # This is misleading as it specifies the row number as the distance vertically
451
                        # and the horizontal 'letter' as a number.
452
                        # https://github.com/OpenDataServices/flatten-tool/issues/153
453
                        cells[header] = Cell(
12✔
454
                            line[header], (sheet_name, str(k + 1), j + 2, heading)
455
                        )
456
                    else:
457
                        cells[header] = Cell(
12✔
458
                            line[header],
459
                            (sheet_name, get_column_letter(k + 1), j + 2, heading),
460
                        )
461
                unflattened = unflatten_main_with_parser(
12✔
462
                    self.parser,
463
                    cells,
464
                    self.timezone,
465
                    self.xml,
466
                    self.id_name,
467
                    self.convert_flags,
468
                )
469
                if root_id_or_none not in main_sheet_by_ocid:
12✔
470
                    main_sheet_by_ocid[root_id_or_none] = TemporaryDict(
12✔
471
                        self.id_name, xml=self.xml
472
                    )
473

474
                def inthere(unflattened, id_name):
12✔
475
                    if self.xml and not isinstance(unflattened.get(self.id_name), Cell):
12✔
476
                        # For an XML tag
477
                        return unflattened[id_name]["text()"].cell_value
12✔
478
                    else:
479
                        # For a JSON, or an XML attribute
480
                        return unflattened[id_name].cell_value
12✔
481

482
                if (
12✔
483
                    self.id_name in unflattened
484
                    and inthere(unflattened, self.id_name)
485
                    in main_sheet_by_ocid[root_id_or_none]
486
                ):
487
                    if self.xml and not isinstance(unflattened.get(self.id_name), Cell):
12✔
488
                        unflattened_id = unflattened.get(self.id_name)[
12✔
489
                            "text()"
490
                        ].cell_value
491
                    else:
492
                        unflattened_id = unflattened.get(self.id_name).cell_value
12✔
493
                    merge(
12✔
494
                        main_sheet_by_ocid[root_id_or_none][unflattened_id],
495
                        unflattened,
496
                        {
497
                            "sheet_name": sheet_name,
498
                            "root_id": self.root_id,
499
                            "root_id_or_none": root_id_or_none,
500
                            "id_name": self.id_name,
501
                            self.id_name: unflattened_id,
502
                        },
503
                    )
504
                else:
505
                    main_sheet_by_ocid[root_id_or_none].append(unflattened)
12✔
506
        temporarydicts_to_lists(main_sheet_by_ocid)
12✔
507
        return sum(main_sheet_by_ocid.values(), [])
12✔
508

509
    def unflatten(self):
12✔
510
        result = self.do_unflatten()
12✔
511
        result = extract_list_to_value(result)
12✔
512
        return result
12✔
513

514
    def fancy_unflatten(self, with_cell_source_map, with_heading_source_map):
12✔
515
        cell_tree = self.do_unflatten()
12✔
516
        result = extract_list_to_value(cell_tree)
12✔
517
        ordered_cell_source_map = None
12✔
518
        heading_source_map = None
12✔
519
        if with_cell_source_map or with_heading_source_map:
12✔
520
            cell_source_map = extract_list_to_error_path(
12✔
521
                [] if self.root_is_list else [self.root_list_path], cell_tree
522
            )
523
            ordered_items = sorted(cell_source_map.items())
12✔
524
            row_source_map = OrderedDict()
12✔
525
            heading_source_map = OrderedDict()
12✔
526
            for path, _unused in ordered_items:
12✔
527
                cells = cell_source_map[path]
12✔
528
                # Prepare row_source_map key
529
                key = "/".join(str(x) for x in path[:-1])
12✔
530
                if not key in row_source_map:
12✔
531
                    row_source_map[key] = []
12✔
532
                if with_heading_source_map:
12✔
533
                    # Prepare header_source_map key
534
                    header_path_parts = []
12✔
535
                    for x in path:
12✔
536
                        try:
12✔
537
                            int(x)
12✔
538
                        except:
12✔
539
                            header_path_parts.append(x)
12✔
540
                    header_path = "/".join(header_path_parts)
12✔
541
                    if header_path not in heading_source_map:
12✔
542
                        heading_source_map[header_path] = []
12✔
543
                # Populate the row and header source maps
544
                for cell in cells:
12✔
545
                    sheet, col, row, header = cell
12✔
546
                    if (sheet, row) not in row_source_map[key]:
12✔
547
                        row_source_map[key].append((sheet, row))
12✔
548
                    if with_heading_source_map:
12✔
549
                        if (sheet, header) not in heading_source_map[header_path]:
12✔
550
                            heading_source_map[header_path].append((sheet, header))
12✔
551
        if with_cell_source_map:
12✔
552
            ordered_cell_source_map = OrderedDict(
12✔
553
                ("/".join(str(x) for x in path), location)
554
                for path, location in ordered_items
555
            )
556
            for key in row_source_map:
12✔
557
                assert key not in ordered_cell_source_map, _(
12✔
558
                    "Row/cell collision: {}"
559
                ).format(key)
560
                ordered_cell_source_map[key] = row_source_map[key]
12✔
561
        return result, ordered_cell_source_map, heading_source_map
12✔
562

563

564
def extract_list_to_error_path(path, input):
12✔
565
    output = {}
12✔
566
    for i, item in enumerate(input):
12✔
567
        res = extract_dict_to_error_path(path + [i], item)
12✔
568
        for p in res:
12✔
569
            assert p not in output, _("Already have key {}").format(p)
12✔
570
            output[p] = res[p]
12✔
571
    return output
12✔
572

573

574
def extract_dict_to_error_path(path, input):
12✔
575
    output = {}
12✔
576
    for k in input:
12✔
577
        if isinstance(input[k], list):
12✔
578
            res = extract_list_to_error_path(path + [k], input[k])
12✔
579
            for p in res:
12✔
580
                assert p not in output, _("Already have key {}").format(p)
12✔
581
                output[p] = res[p]
12✔
582
        elif isinstance(input[k], dict):
12✔
583
            res = extract_dict_to_error_path(path + [k], input[k])
12✔
584
            for p in res:
12✔
585
                assert p not in output, _("Already have key {}").format(p)
12✔
586
                output[p] = res[p]
12✔
587
        elif isinstance(input[k], Cell):
12✔
588
            p = tuple(path + [k])
12✔
589
            assert p not in output, _("Already have key {}").format(p)
12✔
590
            output[p] = [input[k].cell_location]
12✔
591
            for sub_cell in input[k].sub_cells:
12✔
592
                assert sub_cell.cell_value == input[k].cell_value, _(
12✔
593
                    "Two sub-cells have different values: {}, {}"
594
                ).format(input[k].cell_value, sub_cell.cell_value)
595
                output[p].append(sub_cell.cell_location)
12✔
596
        else:
NEW
597
            raise FlattenToolError(
×
598
                _("Unexpected result type in the JSON cell tree: {}").format(input[k])
599
            )
600
    return output
12✔
601

602

603
def extract_list_to_value(input):
12✔
604
    output = []
12✔
605
    for item in input:
12✔
606
        output.append(extract_dict_to_value(item))
12✔
607
    return output
12✔
608

609

610
def extract_dict_to_value(input):
12✔
611
    output = OrderedDict()
12✔
612
    for k in input:
12✔
613
        if isinstance(input[k], list):
12✔
614
            output[k] = extract_list_to_value(input[k])
12✔
615
        elif isinstance(input[k], dict):
12✔
616
            output[k] = extract_dict_to_value(input[k])
12✔
617
        elif isinstance(input[k], Cell):
12✔
618
            output[k] = input[k].cell_value
12✔
619
        else:
NEW
620
            raise FlattenToolError(
×
621
                _("Unexpected result type in the JSON cell tree: {}").format(input[k])
622
            )
623
    return output
12✔
624

625

626
class CSVInput(SpreadsheetInput):
12✔
627
    encoding = "utf-8"
12✔
628

629
    def get_sheet_headings(self, sheet_name):
12✔
630
        sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
12✔
631
        configuration_line = 1 if sheet_configuration else 0
12✔
632
        if not sheet_configuration:
12✔
633
            sheet_configuration = self.base_configuration
12✔
634
        if not self.use_configuration:
12✔
UNCOV
635
            sheet_configuration = {}
×
636
        skip_rows = sheet_configuration.get("skipRows", 0)
12✔
637
        if sheet_configuration.get("ignore"):
12✔
638
            # returning empty headers is a proxy for no data in the sheet.
639
            return []
12✔
640

641
        with open(
12✔
642
            os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding
643
        ) as main_sheet_file:
644
            r = csvreader(NullCharacterFilter(main_sheet_file))
12✔
645
            for num, row in enumerate(r):
12✔
646
                if num == (skip_rows + configuration_line):
12✔
647
                    return row
12✔
648

649
    def read_sheets(self):
12✔
650
        sheet_file_names = os.listdir(self.input_name)
12✔
651
        sheet_names = sorted(
12✔
652
            [fname[:-4] for fname in sheet_file_names if fname.endswith(".csv")]
653
        )
654
        if self.include_sheets:
12✔
UNCOV
655
            for sheet in list(sheet_names):
×
UNCOV
656
                if sheet not in self.include_sheets:
×
UNCOV
657
                    sheet_names.remove(sheet)
×
658
        for sheet in list(self.exclude_sheets) or []:
12✔
659
            try:
12✔
660
                sheet_names.remove(sheet)
12✔
661
            except ValueError:
12✔
662
                pass
12✔
663
        self.sub_sheet_names = sheet_names
12✔
664
        self.sheet_names_map = OrderedDict(
12✔
665
            (sheet_name, sheet_name) for sheet_name in sheet_names
666
        )
667
        self.configure_sheets()
12✔
668

669
    def generate_rows(self, dictreader, sheet_name):
12✔
670
        sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
12✔
671
        configuration_line = 1 if sheet_configuration else 0
12✔
672
        if not sheet_configuration:
12✔
673
            sheet_configuration = self.base_configuration
12✔
674
        if not self.use_configuration:
12✔
UNCOV
675
            sheet_configuration = {}
×
676

677
        skip_rows = sheet_configuration.get("skipRows", 0)
12✔
678
        header_rows = sheet_configuration.get("headerRows", 1)
12✔
679
        for i in range(0, configuration_line + skip_rows):
12✔
680
            previous_row = next(dictreader.reader)  # noqa
12✔
681
        fieldnames = dictreader.fieldnames
12✔
682
        for i in range(0, header_rows - 1):
12✔
683
            next(dictreader.reader)
12✔
684
        for line in dictreader:
12✔
685
            yield OrderedDict((fieldname, line[fieldname]) for fieldname in fieldnames)
12✔
686

687
    def get_sheet_configuration(self, sheet_name):
12✔
688
        with open(
12✔
689
            os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding
690
        ) as main_sheet_file:
691
            r = csvreader(NullCharacterFilter(main_sheet_file))
12✔
692
            heading_row = next(r)
12✔
693
        if len(heading_row) > 0 and heading_row[0] == "#":
12✔
694
            return heading_row[1:]
12✔
695
        return []
12✔
696

697
    def get_sheet_lines(self, sheet_name):
12✔
698
        # Pass the encoding to the open function
699
        with open(
12✔
700
            os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding
701
        ) as main_sheet_file:
702
            dictreader = DictReader(main_sheet_file)
12✔
703
            for row in self.generate_rows(dictreader, sheet_name):
12✔
704
                yield row
12✔
705

706

707
class BadXLSXZipFile(BadZipFile, FlattenToolError):
12✔
708
    pass
12✔
709

710

711
class XLSXInput(SpreadsheetInput):
12✔
712
    def read_sheets(self):
12✔
713
        try:
12✔
714
            self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
12✔
715
        except BadZipFile as e:  # noqa
12✔
716
            # TODO when we have python3 only add 'from e' to show exception chain
717
            raise BadXLSXZipFile(
12✔
718
                _("The supplied file has extension .xlsx but isn't an XLSX file.")
719
            )
720

721
        self.sheet_names_map = OrderedDict(
12✔
722
            (sheet_name, sheet_name) for sheet_name in self.workbook.sheetnames
723
        )
724
        if self.include_sheets:
12✔
725
            for sheet in list(self.sheet_names_map):
12✔
726
                if sheet not in self.include_sheets:
12✔
727
                    self.sheet_names_map.pop(sheet)
12✔
728
        for sheet in self.exclude_sheets or []:
12✔
729
            self.sheet_names_map.pop(sheet, None)
12✔
730

731
        sheet_names = list(sheet for sheet in self.sheet_names_map.keys())
12✔
732
        self.sub_sheet_names = sheet_names
12✔
733
        self.configure_sheets()
12✔
734

735
    def get_sheet_headings(self, sheet_name):
12✔
736
        worksheet = self.workbook[self.sheet_names_map[sheet_name]]
12✔
737
        sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
12✔
738
        configuration_line = 1 if sheet_configuration else 0
12✔
739
        if not sheet_configuration:
12✔
740
            sheet_configuration = self.base_configuration
12✔
741
        if not self.use_configuration:
12✔
742
            sheet_configuration = {}
12✔
743

744
        skip_rows = sheet_configuration.get("skipRows", 0)
12✔
745
        if sheet_configuration.get("ignore") or (
12✔
746
            sheet_configuration.get("hashcomments") and sheet_name.startswith("#")
747
        ):
748
            # returning empty headers is a proxy for no data in the sheet.
749
            return []
12✔
750

751
        if self.vertical_orientation:
12✔
752
            return [
12✔
753
                cell.value
754
                for cell in worksheet[get_column_letter(skip_rows + 1)][
755
                    configuration_line:
756
                ]
757
            ]
758

759
        try:
12✔
760
            return [
12✔
761
                cell.value for cell in worksheet[skip_rows + configuration_line + 1]
762
            ]
UNCOV
763
        except IndexError:
×
764
            # If the heading line is after data in the spreadsheet. i.e when skipRows
UNCOV
765
            return []
×
766

767
    def get_sheet_configuration(self, sheet_name):
12✔
768
        worksheet = self.workbook[self.sheet_names_map[sheet_name]]
12✔
769
        if worksheet["A1"].value == "#":
12✔
770
            return [
12✔
771
                cell.value
772
                for num, cell in enumerate(worksheet[1])
773
                if num != 0 and cell.value
774
            ]
775
        else:
776
            return []
12✔
777

778
    def get_sheet_lines(self, sheet_name):
12✔
779
        sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
12✔
780
        configuration_line = 1 if sheet_configuration else 0
12✔
781
        if not sheet_configuration:
12✔
782
            sheet_configuration = self.base_configuration
12✔
783
        if not self.use_configuration:
12✔
784
            sheet_configuration = {}
12✔
785

786
        skip_rows = sheet_configuration.get("skipRows", 0)
12✔
787
        header_rows = sheet_configuration.get("headerRows", 1)
12✔
788

789
        worksheet = self.workbook[self.sheet_names_map[sheet_name]]
12✔
790
        if self.vertical_orientation:
12✔
791
            header_row = worksheet[get_column_letter(skip_rows + 1)]
12✔
792
            remaining_rows = worksheet.iter_cols(min_col=skip_rows + header_rows + 1)
12✔
793
            if configuration_line:
12✔
794
                header_row = header_row[1:]
12✔
795
                remaining_rows = worksheet.iter_cols(
12✔
796
                    min_col=skip_rows + header_rows + 1, min_row=2
797
                )
798
        else:
799
            header_row = worksheet[skip_rows + configuration_line + 1]
12✔
800
            remaining_rows = worksheet.iter_rows(
12✔
801
                min_row=skip_rows + configuration_line + header_rows + 1
802
            )
803

804
        coli_to_header = {}
12✔
805
        for i, header in enumerate(header_row):
12✔
806
            coli_to_header[i] = header.value
12✔
807

808
        for row in remaining_rows:
12✔
809
            output_row = OrderedDict()
12✔
810
            for i, x in enumerate(row):
12✔
811
                header = coli_to_header[i]
12✔
812
                value = x.value
12✔
813
                if not header:
12✔
814
                    # None means that the cell will be ignored
815
                    value = None
12✔
816
                elif (
12✔
817
                    sheet_configuration.get("hashcomments")
818
                    and isinstance(header, str)
819
                    and header.startswith("#")
820
                ):
821
                    # None means that the cell will be ignored
822
                    value = None
12✔
823
                output_row[header] = value
12✔
824
            yield output_row
12✔
825

826

827
class ODSInput(SpreadsheetInput):
12✔
828
    def read_sheets(self):
12✔
829
        self.workbook = ODSReader(self.input_name)
12✔
830
        self.sheet_names_map = self.workbook.SHEETS
12✔
831

832
        if self.include_sheets:
12✔
833
            for sheet in list(self.sheet_names_map):
12✔
834
                if sheet not in self.include_sheets:
12✔
835
                    self.sheet_names_map.pop(sheet)
12✔
836

837
        for sheet in self.exclude_sheets or []:
12✔
838
            self.sheet_names_map.pop(sheet, None)
12✔
839

840
        self.sub_sheet_names = self.sheet_names_map.keys()
12✔
841
        self.configure_sheets()
12✔
842

843
    def _resolve_sheet_configuration(self, sheet_name):
12✔
844
        sheet_configuration = self.sheet_configuration[sheet_name]
12✔
845
        if not self.use_configuration:
12✔
846
            return {"unused_config_line": True} if sheet_configuration else {}
12✔
847
        if not sheet_configuration:
12✔
848
            sheet_configuration = self.base_configuration
12✔
849
            sheet_configuration["base_configuration"] = True
12✔
850

851
        return sheet_configuration
12✔
852

853
    def get_sheet_headings(self, sheet_name):
12✔
854
        worksheet = self.sheet_names_map[sheet_name]
12✔
855

856
        sheet_configuration = self._resolve_sheet_configuration(sheet_name)
12✔
857
        configuration_line = (
12✔
858
            1
859
            if sheet_configuration and "base_configuration" not in sheet_configuration
860
            else 0
861
        )
862

863
        skip_rows = sheet_configuration.get("skipRows", 0)
12✔
864
        if sheet_configuration.get("ignore") or (
12✔
865
            sheet_configuration.get("hashcomments") and sheet_name.startswith("#")
866
        ):
867
            # returning empty headers is a proxy for no data in the sheet.
868
            return []
12✔
869

870
        if self.vertical_orientation:
12✔
871
            return [
12✔
872
                row[skip_rows]
873
                for row in worksheet[configuration_line:]
874
                if len(row) > skip_rows
875
            ]
876

877
        try:
12✔
878
            return [cell for cell in worksheet[skip_rows + configuration_line]]
12✔
UNCOV
879
        except IndexError:
×
880
            # If the heading line is after data in the spreadsheet. i.e when skipRows
UNCOV
881
            return []
×
882

883
    def get_sheet_configuration(self, sheet_name):
12✔
884
        # See if there are config properties in the spreadsheet
885
        # https://flatten-tool.readthedocs.io/en/latest/unflatten/#configuration-properties-skip-and-header-rows
886
        worksheet = self.sheet_names_map[sheet_name]
12✔
887

888
        try:
12✔
889
            # cell A1
890
            if worksheet[0][0] == "#":
12✔
891
                return worksheet[0]
12✔
892

893
        except IndexError:
12✔
894
            pass
12✔
895

896
        return []
12✔
897

898
    def get_sheet_lines(self, sheet_name):
12✔
899
        # This generator should yield an ordered dict in the format
900
        # see examples/simple/
901
        # yield OrderedDict([('a/b', '1'), ('a/c', '2'), ('d', '3')])
902
        # yield OrderedDict([('a/b', '4'), ('a/c', '5'), ('d', '6')])
903

904
        sheet_configuration = self._resolve_sheet_configuration(sheet_name)
12✔
905
        configuration_line = (
12✔
906
            1
907
            if sheet_configuration and "base_configuration" not in sheet_configuration
908
            else 0
909
        )
910

911
        skip_rows = sheet_configuration.get("skipRows", 0)
12✔
912
        header_rows = sheet_configuration.get("headerRows", 1)
12✔
913

914
        worksheet = self.sheet_names_map[sheet_name]
12✔
915
        if self.vertical_orientation:
12✔
916
            header_row = [
12✔
917
                row[skip_rows]
918
                for row in worksheet[configuration_line:]
919
                if len(row) > skip_rows
920
            ]
921
            longest_horizontal_row = max(
12✔
922
                len(row) for row in worksheet[configuration_line:]
923
            )
924
            remaining_rows = [
12✔
925
                [
926
                    row[i] if len(row) > i else None
927
                    for row in worksheet[configuration_line:]
928
                    if row
929
                ]
930
                for i in range(1, longest_horizontal_row)
931
            ]
932
        else:
933
            header_row = worksheet[skip_rows + configuration_line]
12✔
934
            remaining_rows = worksheet[(skip_rows + configuration_line + header_rows) :]
12✔
935

936
        coli_to_header = {}
12✔
937
        for i, header in enumerate(header_row):
12✔
938
            coli_to_header[i] = header
12✔
939

940
        for row in remaining_rows:
12✔
941
            output_row = OrderedDict()
12✔
942
            for i, x in enumerate(row):
12✔
943

944
                try:
12✔
945
                    header = coli_to_header[i]
12✔
946
                except KeyError:
12✔
947
                    continue
12✔
948
                value = x
12✔
949
                if not header:
12✔
950
                    # None means that the cell will be ignored
951
                    value = None
12✔
952
                elif sheet_configuration.get("hashcomments") and header.startswith("#"):
12✔
953
                    # None means that the cell will be ignored
954
                    value = None
12✔
955
                output_row[header] = value
12✔
956
            if output_row:
12✔
957
                if not all(value is None for value in output_row.values()):
12✔
958
                    yield output_row
12✔
959

960

961
FORMATS = {"xlsx": XLSXInput, "csv": CSVInput, "ods": ODSInput}
12✔
962

963

964
class ListAsDict(dict):
12✔
965
    pass
12✔
966

967

968
def list_as_dicts_to_temporary_dicts(unflattened, id_name, xml):
12✔
969
    for key, value in list(unflattened.items()):
12✔
970
        if isinstance(value, Cell):
12✔
971
            continue
12✔
972
        if hasattr(value, "items"):
12✔
973
            if not value:
12✔
974
                unflattened.pop(key)
12✔
975
            list_as_dicts_to_temporary_dicts(value, id_name, xml)
12✔
976
        if isinstance(value, ListAsDict):
12✔
977
            temporarydict = TemporaryDict(id_name, xml=xml)
12✔
978
            for index in sorted(value.keys()):
12✔
979
                temporarydict.append(value[index])
12✔
980
            unflattened[key] = temporarydict
12✔
981
    return unflattened
12✔
982

983

984
def unflatten_main_with_parser(parser, line, timezone, xml, id_name, convert_flags={}):
12✔
985
    unflattened = OrderedDict()
12✔
986
    for path, cell in line.items():
12✔
987
        # Skip blank cells
988
        if cell.cell_value is None or cell.cell_value == "":
12✔
989
            continue
12✔
990
        current_path = unflattened
12✔
991
        path_list = [item.rstrip("[]") for item in str(path).split("/")]
12✔
992
        for num, path_item in enumerate(path_list):
12✔
993
            if isint(path_item):
12✔
994
                if num == 0:
12✔
995
                    warn(
12✔
996
                        _(
997
                            'Column "{}" has been ignored because it is a number.'
998
                        ).format(path),
999
                        DataErrorWarning,
1000
                    )
1001
                continue
12✔
1002
            current_type = None
12✔
1003
            path_till_now = "/".join(
12✔
1004
                [item for item in path_list[: num + 1] if not isint(item)]
1005
            )
1006
            if parser:
12✔
1007
                current_type = parser.flattened.get(path_till_now)
12✔
1008
            try:
12✔
1009
                next_path_item = path_list[num + 1]
12✔
1010
            except IndexError:
12✔
1011
                next_path_item = ""
12✔
1012

1013
            # Quick solution to avoid casting of date as datetime in spreadsheet > xml
1014
            if xml:
12✔
1015
                if type(cell.cell_value) == datetime.datetime and not next_path_item:
12✔
1016
                    if "datetime" not in str(path):
12✔
1017
                        current_type = "date"
12✔
1018

1019
            ## Array
1020
            list_index = -1
12✔
1021
            if isint(next_path_item):
12✔
1022
                if current_type and current_type != "array":
12✔
NEW
1023
                    raise FlattenToolValueError(
×
1024
                        _(
1025
                            "There is an array at '{}' when the schema says there should be a '{}'"
1026
                        ).format(path_till_now, current_type)
1027
                    )
1028
                list_index = int(next_path_item)
12✔
1029
                current_type = "array"
12✔
1030

1031
            if current_type == "array":
12✔
1032
                list_as_dict = current_path.get(path_item)
12✔
1033
                if list_as_dict is None:
12✔
1034
                    list_as_dict = ListAsDict()
12✔
1035
                    current_path[path_item] = list_as_dict
12✔
1036
                elif type(list_as_dict) is not ListAsDict:
12✔
1037
                    warn(
12✔
1038
                        _(
1039
                            "Column {} has been ignored, because it treats {} as an array, but another column does not."
1040
                        ).format(path, path_till_now),
1041
                        DataErrorWarning,
1042
                    )
1043
                    break
12✔
1044
                new_path = list_as_dict.get(list_index)
12✔
1045
                if new_path is None:
12✔
1046
                    new_path = OrderedDict()
12✔
1047
                    list_as_dict[list_index] = new_path
12✔
1048
                current_path = new_path
12✔
1049
                if not xml or num < len(path_list) - 2:
12✔
1050
                    # In xml "arrays" can have text values, if they're the final element
1051
                    # This corresponds to a tag with text, but also possibly attributes
1052
                    continue
12✔
1053

1054
            ## Object
1055
            if current_type == "object" or (not current_type and next_path_item):
12✔
1056
                new_path = current_path.get(path_item)
12✔
1057
                if new_path is None:
12✔
1058
                    new_path = OrderedDict()
12✔
1059
                    current_path[path_item] = new_path
12✔
1060
                elif type(new_path) is ListAsDict or not hasattr(new_path, "items"):
12✔
1061
                    warn(
12✔
1062
                        _(
1063
                            "Column {} has been ignored, because it treats {} as an object, but another column does not."
1064
                        ).format(path, path_till_now),
1065
                        DataErrorWarning,
1066
                    )
1067
                    break
12✔
1068
                current_path = new_path
12✔
1069
                continue
12✔
1070
            if (
12✔
1071
                current_type
1072
                and current_type not in ["object", "array"]
1073
                and next_path_item
1074
            ):
NEW
1075
                raise FlattenToolValueError(
×
1076
                    _(
1077
                        "There is an object or list at '{}' but it should be an {}"
1078
                    ).format(path_till_now, current_type)
1079
                )
1080

1081
            ## Other Types
1082
            current_path_value = current_path.get(path_item)
12✔
1083
            if not xml and (
12✔
1084
                type(current_path_value) is ListAsDict
1085
                or hasattr(current_path_value, "items")
1086
            ):
1087
                #   ^
1088
                # xml can have an object/array that also has a text value
1089
                warn(
12✔
1090
                    _(
1091
                        "Column {} has been ignored, because another column treats it as an array or object"
1092
                    ).format(path_till_now),
1093
                    DataErrorWarning,
1094
                )
1095
                continue
12✔
1096

1097
            value = cell.cell_value
12✔
1098
            if xml and current_type == "array":
12✔
1099
                # In xml "arrays" can have text values, if they're the final element
1100
                # However the type of the text value itself should not be "array",
1101
                # as that would split the text on commas, which we don't want.
1102
                # https://github.com/OpenDataServices/cove/issues/1030
1103
                converted_value = convert_type("", value, timezone, convert_flags)
12✔
1104
            else:
1105
                converted_value = convert_type(
12✔
1106
                    current_type or "", value, timezone, convert_flags
1107
                )
1108
            cell.cell_value = converted_value
12✔
1109
            if converted_value is not None and converted_value != "":
12✔
1110
                if xml:
12✔
1111
                    # For XML we want to support text and attributes at the
1112
                    # same level, e.g.
1113
                    # <my-element a="b">some text</my-element>
1114
                    # which we represent in a dict as:
1115
                    # {"@a":"b", "text()": "some text"}
1116
                    # To ensure we can attach attributes everywhere, all
1117
                    # element text must be added as a dict with a `text()` key.
1118
                    if path_item.startswith("@"):
12✔
1119
                        current_path[path_item] = cell
12✔
1120
                    else:
1121
                        if current_type == "array":
12✔
1122
                            current_path["text()"] = cell
12✔
1123
                        elif path_item not in current_path:
12✔
1124
                            current_path[path_item] = {"text()": cell}
12✔
1125
                        else:
1126
                            current_path[path_item]["text()"] = cell
12✔
1127
                else:
1128
                    current_path[path_item] = cell
12✔
1129

1130
    unflattened = list_as_dicts_to_temporary_dicts(unflattened, id_name, xml)
12✔
1131
    return unflattened
12✔
1132

1133

1134
def path_search(
12✔
1135
    nested_dict, path_list, id_fields=None, path=None, top=False, top_sheet=False
1136
):
1137
    if not path_list:
12✔
1138
        return nested_dict
12✔
1139

1140
    id_fields = id_fields or {}
12✔
1141
    parent_field = path_list[0]
12✔
1142
    path = parent_field if path is None else path + "/" + parent_field
12✔
1143

1144
    if parent_field.endswith("[]") or top:
12✔
1145
        if parent_field.endswith("[]"):
12✔
1146
            parent_field = parent_field[:-2]
12✔
1147
        if parent_field not in nested_dict:
12✔
UNCOV
1148
            nested_dict[parent_field] = TemporaryDict(
×
1149
                keyfield=id_name, top_sheet=top_sheet, xml=xml  # noqa
1150
            )
1151
        sub_sheet_id = id_fields.get(path + "/id")
12✔
1152
        if sub_sheet_id not in nested_dict[parent_field]:
12✔
1153
            nested_dict[parent_field][sub_sheet_id] = {}
×
1154
        return path_search(
12✔
1155
            nested_dict[parent_field][sub_sheet_id],
1156
            path_list[1:],
1157
            id_fields=id_fields,
1158
            path=path,
1159
            top_sheet=top_sheet,
1160
        )
1161
    else:
1162
        if parent_field not in nested_dict:
12✔
UNCOV
1163
            nested_dict[parent_field] = OrderedDict()
×
1164
        return path_search(
12✔
1165
            nested_dict[parent_field],
1166
            path_list[1:],
1167
            id_fields=id_fields,
1168
            path=path,
1169
            top_sheet=top_sheet,
1170
        )
1171

1172

1173
class TemporaryDict(UserDict):
12✔
1174
    def __init__(self, keyfield, top_sheet=False, xml=False):
12✔
1175
        self.keyfield = keyfield
12✔
1176
        self.items_no_keyfield = []
12✔
1177
        self.data = OrderedDict()
12✔
1178
        self.top_sheet = top_sheet
12✔
1179
        self.xml = xml
12✔
1180

1181
    def __repr__(self):
12✔
UNCOV
1182
        return "TemporaryDict(keyfield={}, items_no_keyfield={}, data={})".format(
×
1183
            repr(self.keyfield), repr(self.items_no_keyfield), repr(self.data)
1184
        )
1185

1186
    def append(self, item):
12✔
1187
        if self.keyfield in item:
12✔
1188
            if self.xml:
12✔
1189
                if isinstance(item[self.keyfield], Cell):
12✔
1190
                    # For an XML attribute
UNCOV
1191
                    key = item[self.keyfield].cell_value
×
1192
                elif isinstance(item[self.keyfield]["text()"], Cell):
12✔
1193
                    # For an XML tag
1194
                    key = item[self.keyfield]["text()"].cell_value
12✔
1195
                else:
1196
                    key = item[self.keyfield]["text()"]
×
1197
            else:
1198
                if isinstance(item[self.keyfield], Cell):
12✔
1199
                    key = item[self.keyfield].cell_value
12✔
1200
                else:
1201
                    key = item[self.keyfield]
×
1202
            if key not in self.data:
12✔
1203
                self.data[key] = item
12✔
1204
            else:
UNCOV
1205
                self.data[key].update(item)
×
1206
        else:
1207
            self.items_no_keyfield.append(item)
12✔
1208

1209
    def to_list(self):
12✔
1210
        return list(self.data.values()) + self.items_no_keyfield
12✔
1211

1212

1213
def temporarydicts_to_lists(nested_dict):
12✔
1214
    """Recursively transforms TemporaryDicts to lists inplace."""
1215
    for key, value in nested_dict.items():
12✔
1216
        if isinstance(value, Cell):
12✔
1217
            continue
12✔
1218
        if hasattr(value, "to_list"):
12✔
1219
            temporarydicts_to_lists(value)
12✔
1220
            if hasattr(value, "items_no_keyfield"):
12✔
1221
                for x in value.items_no_keyfield:
12✔
1222
                    temporarydicts_to_lists(x)
12✔
1223
            nested_dict[key] = value.to_list()
12✔
1224
        elif hasattr(value, "items"):
12✔
1225
            temporarydicts_to_lists(value)
12✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc