• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenDataServices / flatten-tool / 6507626273

13 Oct 2023 11:25AM UTC coverage: 42.006% (-53.7%) from 95.72%
6507626273

Pull #433

github

odscjames
New "Geo" optional dependencies

https://github.com/OpenDataServices/flatten-tool/issues/424
Pull Request #433: New "Geo" optional dependencies

38 of 38 new or added lines in 6 files covered. (100.0%)

1466 of 3490 relevant lines covered (42.01%)

4.16 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.21
/flattentool/tests/test_input_SpreadsheetInput_unflatten.py
1
# -*- coding: utf-8 -*-
2
"""
10✔
3
Tests of unflatten method of the SpreadsheetInput class from input.py
4
This file only covers tests for the main sheet. Tests for multiple sheets are in test_input_SpreadsheetInput_unflatten_multiplesheets.py
5

6
"""
7
from __future__ import unicode_literals
10✔
8

9
import datetime
10✔
10
from collections import OrderedDict
10✔
11
from decimal import Decimal
10✔
12

13
import pytest
10✔
14

15
from flattentool.schema import SchemaParser
10✔
16

17
from .test_input_SpreadsheetInput import ListInput
10✔
18

19
ROOT_ID_TITLES = {"ocid": "Open Contracting ID", "custom": "Custom"}
10✔
20

21

22
def inject_root_id(root_id, d):
10✔
23
    """
24
    Insert the appropriate root id, with the given value, into the dictionary d and return.
25
    """
26
    new_d = type(d)()
10✔
27
    for k, v in d.items():
10✔
28
        if k == "ROOT_ID":
10✔
29
            if root_id == "":
×
30
                continue
×
31
            else:
32
                k = root_id
×
33
        elif k == "ROOT_ID_TITLE":
10✔
34
            if root_id == "":
×
35
                continue
×
36
            else:
37
                k = ROOT_ID_TITLES[root_id]
×
38
        new_d[k] = v
10✔
39
    return new_d
10✔
40

41

42
UNICODE_TEST_STRING = "éαГ😼𝒞人"
10✔
43
# ROOT_ID will be replace by the appropriate root_id name in the test (e.g. ocid)
44

45
testdata = [
10✔
46
    (
47
        "Basic flat",
48
        [{"ROOT_ID": "1", "id": 2, "testA": 3}],
49
        [{"ROOT_ID": "1", "id": 2, "testA": 3}],
50
        [],
51
        True,
52
        True,
53
    ),
54
    (
55
        "Basic with float",
56
        # 3.0 is converted to 3
57
        # This is needed to handle google docs xlsx properly
58
        # https://github.com/OpenDataServices/cove/issues/838
59
        [{"ROOT_ID": "1", "id": 2, "testA": 3.0}],
60
        [{"ROOT_ID": "1", "id": 2, "testA": 3}],
61
        [],
62
        True,
63
        True,
64
    ),
65
    (
66
        "Basic with zero",
67
        [{"ROOT_ID": "1", "id": 2, "testA": 0}],
68
        [{"ROOT_ID": "1", "id": 2, "testA": 0}],
69
        [],
70
        True,
71
        True,
72
    ),
73
    (
74
        "Basic with date-time",
75
        [{"ROOT_ID": "1", "id": 2, "testDateTime": datetime.datetime(2020, 1, 1)}],
76
        [{"ROOT_ID": "1", "id": 2, "testDateTime": "2020-01-01T00:00:00+00:00"}],
77
        [],
78
        False,
79
        True,
80
    ),
81
    (
82
        "Basic with date",
83
        [{"ROOT_ID": "1", "id": 2, "testDate": datetime.datetime(2020, 1, 1)}],
84
        [{"ROOT_ID": "1", "id": 2, "testDate": "2020-01-01"}],
85
        [],
86
        False,
87
        False,
88
    ),
89
    (
90
        "Nested",
91
        [
92
            {
93
                "ROOT_ID": "1",
94
                "id": 2,
95
                "testO/testB": 3,
96
                "testO/testC": 4,
97
            }
98
        ],
99
        [{"ROOT_ID": "1", "id": 2, "testO": {"testB": 3, "testC": 4}}],
100
        [],
101
        True,
102
        True,
103
    ),
104
    (
105
        "Unicode",
106
        [{"ROOT_ID": UNICODE_TEST_STRING, "testU": UNICODE_TEST_STRING}],
107
        [{"ROOT_ID": UNICODE_TEST_STRING, "testU": UNICODE_TEST_STRING}],
108
        [],
109
        True,
110
        True,
111
    ),
112
    (
113
        "Single item array",
114
        [{"ROOT_ID": "1", "id": 2, "testL/0/id": 3, "testL/0/testB": 4}],
115
        [
116
            {
117
                "ROOT_ID": "1",
118
                "id": 2,
119
                "testL": [{"id": 3, "testB": 4}],
120
            }
121
        ],
122
        [],
123
        False,
124
        True,
125
    ),
126
    (
127
        "Single item array without parent ID",
128
        [
129
            {
130
                "ROOT_ID": "1",
131
                "testL/0/id": "2",
132
                "testL/0/testB": "3",
133
            }
134
        ],
135
        [
136
            {
137
                "ROOT_ID": "1",
138
                "testL": [{"id": "2", "testB": "3"}],
139
            }
140
        ],
141
        [],
142
        False,
143
        True,
144
    ),
145
    (
146
        "Empty",
147
        [
148
            {
149
                "ROOT_ID": "",
150
                "id": "",
151
                "testA": "",
152
                "testB": "",
153
                "testC": "",
154
                "testD": "",
155
                "testE": "",
156
            }
157
        ],
158
        [],
159
        [],
160
        False,
161
        True,
162
    ),
163
    (
164
        "Empty except for root id",
165
        [
166
            {
167
                "ROOT_ID": 1,
168
                "id": "",
169
                "testA": "",
170
                "testB": "",
171
                "testC": "",
172
                "testD": "",
173
                "testE": "",
174
            }
175
        ],
176
        [{"ROOT_ID": 1}],
177
        [],
178
        False,
179
        True,
180
    ),
181
    # Previously this caused the error: TypeError: unorderable types: str() < int()
182
    # Now one of the columns is ignored
183
    (
184
        "Mismatch of object/array for field not in schema",
185
        [
186
            OrderedDict(
187
                [
188
                    ("ROOT_ID", 1),
189
                    ("id", 2),
190
                    ("newtest/a", 3),
191
                    ("newtest/0/a", 4),
192
                ]
193
            )
194
        ],
195
        [
196
            {
197
                "ROOT_ID": 1,
198
                "id": 2,
199
                "newtest": {
200
                    "a": 3,
201
                },
202
            }
203
        ],
204
        [
205
            "Column newtest/0/a has been ignored, because it treats newtest as an array, but another column does not."
206
        ],
207
        False,
208
        True,
209
    ),
210
    # Previously this caused the error: TypeError: unorderable types: str() < int()
211
    # Now one of the columns is ignored
212
    (
213
        "Mismatch of array/object for field not in schema",
214
        [
215
            OrderedDict(
216
                [
217
                    ("ROOT_ID", 1),
218
                    ("id", 2),
219
                    ("newtest/0/a", 4),
220
                    ("newtest/a", 3),
221
                ]
222
            )
223
        ],
224
        [{"ROOT_ID": 1, "id": 2, "newtest": [{"a": 4}]}],
225
        [
226
            "Column newtest/a has been ignored, because it treats newtest as an object, but another column does not."
227
        ],
228
        False,
229
        True,
230
    ),
231
    # Previously this caused the error: 'Cell' object has no attribute 'get'
232
    # Now one of the columns is ignored
233
    (
234
        "str / array mixing",
235
        [
236
            OrderedDict(
237
                [
238
                    ("ROOT_ID", 1),
239
                    ("id", 2),
240
                    ("newtest", 3),
241
                    ("newtest/0/a", 4),
242
                ]
243
            )
244
        ],
245
        [{"ROOT_ID": 1, "id": 2, "newtest": 3}],
246
        [
247
            "Column newtest/0/a has been ignored, because it treats newtest as an array, but another column does not."
248
        ],
249
        False,
250
        True,
251
    ),
252
    (
253
        "str / object mixing",
254
        [
255
            OrderedDict(
256
                [
257
                    ("ROOT_ID", 1),
258
                    ("id", 2),
259
                    ("newtest", 3),
260
                    ("newtest/a", 4),
261
                ]
262
            )
263
        ],
264
        [{"ROOT_ID": 1, "id": 2, "newtest": 3}],
265
        [
266
            "Column newtest/a has been ignored, because it treats newtest as an object, but another column does not."
267
        ],
268
        False,
269
        True,
270
    ),
271
    (
272
        "array / str mixing",
273
        [
274
            OrderedDict(
275
                [
276
                    ("ROOT_ID", 1),
277
                    ("id", 2),
278
                    ("nest/newtest/0/a", 3),
279
                    ("nest/newtest", 4),
280
                ]
281
            )
282
        ],
283
        [{"ROOT_ID": 1, "id": 2, "nest": {"newtest": [{"a": 3}]}}],
284
        [
285
            "Column nest/newtest has been ignored, because another column treats it as an array or object"
286
        ],
287
        False,
288
        True,
289
    ),
290
    (
291
        "object / str mixing",
292
        [
293
            OrderedDict(
294
                [
295
                    ("ROOT_ID", 1),
296
                    ("id", 2),
297
                    ("newtest/a", 3),
298
                    ("newtest", 4),
299
                ]
300
            )
301
        ],
302
        [{"ROOT_ID": 1, "id": 2, "newtest": {"a": 3}}],
303
        [
304
            "Column newtest has been ignored, because another column treats it as an array or object"
305
        ],
306
        False,
307
        True,
308
    ),
309
    (
310
        "Mismatch of object/array for field not in schema (multiline)",
311
        [
312
            OrderedDict(
313
                [
314
                    ("ROOT_ID", 1),
315
                    ("id", 2),
316
                    ("nest/newtest/a", 3),
317
                ]
318
            ),
319
            OrderedDict(
320
                [
321
                    ("ROOT_ID", 1),
322
                    ("id", 2),
323
                    ("nest/newtest/0/a", 4),
324
                ]
325
            ),
326
        ],
327
        [
328
            {
329
                "ROOT_ID": 1,
330
                "id": 2,
331
                "nest": {
332
                    "newtest": {
333
                        "a": 3,
334
                    }
335
                },
336
            }
337
        ],
338
        [
339
            "Column nest/newtest/0/a has been ignored, because it treats newtest as an array, but another column does not"
340
        ],
341
        False,
342
        True,
343
    ),
344
    # Previously this caused the error: TypeError: unorderable types: str() < int()
345
    # Now one of the columns is ignored
346
    (
347
        "Mismatch of array/object for field not in schema (multiline)",
348
        [
349
            OrderedDict(
350
                [
351
                    ("ROOT_ID", 1),
352
                    ("id", 2),
353
                    ("newtest/0/a", 4),
354
                ]
355
            ),
356
            OrderedDict(
357
                [
358
                    ("ROOT_ID", 1),
359
                    ("id", 2),
360
                    ("newtest/a", 3),
361
                ]
362
            ),
363
        ],
364
        [{"ROOT_ID": 1, "id": 2, "newtest": [{"a": 4}]}],
365
        [
366
            "Column newtest/a has been ignored, because it treats newtest as an object, but another column does not"
367
        ],
368
        False,
369
        True,
370
    ),
371
    # Previously this caused the error: 'Cell' object has no attribute 'get'
372
    # Now one of the columns is ignored
373
    (
374
        "str / array mixing multiline",
375
        [
376
            OrderedDict(
377
                [
378
                    ("ROOT_ID", 1),
379
                    ("id", 2),
380
                    ("nest/newtest", 3),
381
                ]
382
            ),
383
            OrderedDict(
384
                [
385
                    ("ROOT_ID", 1),
386
                    ("id", 2),
387
                    ("nest/newtest/0/a", 4),
388
                    ("nest/newtest/0/b", 5),
389
                ]
390
            ),
391
        ],
392
        [{"ROOT_ID": 1, "id": 2, "nest": {"newtest": 3}}],
393
        [
394
            "Column nest/newtest/0/a has been ignored, because it treats newtest as an array, but another column does not",
395
            "Column nest/newtest/0/b has been ignored, because it treats newtest as an array, but another column does not",
396
        ],
397
        False,
398
        True,
399
    ),
400
    (
401
        "array / str mixing multiline",
402
        # same as above, but with rows switched
403
        [
404
            OrderedDict(
405
                [
406
                    ("ROOT_ID", 1),
407
                    ("id", 2),
408
                    ("nest/newtest/0/a", 4),
409
                ]
410
            ),
411
            OrderedDict(
412
                [
413
                    ("ROOT_ID", 1),
414
                    ("id", 2),
415
                    ("nest/newtest", 3),
416
                ]
417
            ),
418
        ],
419
        [{"ROOT_ID": 1, "id": 2, "nest": {"newtest": [{"a": 4}]}}],
420
        [
421
            "Column nest/newtest has been ignored, because another column treats it as an array or object"
422
        ],
423
        False,
424
        True,
425
    ),
426
    # WARNING: Conflict when merging field "newtest" for id "2" in sheet custom_main: "3"
427
    (
428
        "str / object mixing multiline",
429
        [
430
            OrderedDict(
431
                [
432
                    ("ROOT_ID", 1),
433
                    ("id", 2),
434
                    ("newtest", 3),
435
                ]
436
            ),
437
            OrderedDict(
438
                [
439
                    ("ROOT_ID", 1),
440
                    ("id", 2),
441
                    ("newtest/a", 4),
442
                    ("newtest/b", 5),
443
                ]
444
            ),
445
        ],
446
        [{"ROOT_ID": 1, "id": 2, "newtest": 3}],
447
        [
448
            "Column newtest/a has been ignored, because it treats newtest as an object, but another column does not",
449
            "Column newtest/b has been ignored, because it treats newtest as an object, but another column does not",
450
        ],
451
        False,
452
        True,
453
    ),
454
    (
455
        "object / str mixing multiline",
456
        [
457
            OrderedDict(
458
                [
459
                    ("ROOT_ID", 1),
460
                    ("id", 2),
461
                    ("newtest/a", 4),
462
                ]
463
            ),
464
            OrderedDict(
465
                [
466
                    ("ROOT_ID", 1),
467
                    ("id", 2),
468
                    ("newtest", 3),
469
                ]
470
            ),
471
        ],
472
        [{"ROOT_ID": 1, "id": 2, "newtest": {"a": 4}}],
473
        [
474
            "Column newtest has been ignored, because another column treats it as an array or object"
475
        ],
476
        False,
477
        True,
478
    ),
479
    # Previously this caused the error: KeyError('ocid',)
480
    # Now it works, but probably not as intended
481
    # The missing Root ID should be picked up in schema validation
482
    # (Cove will do this automatically).
483
    (
484
        "Root ID is missing",
485
        [
486
            OrderedDict(
487
                [
488
                    ("id", 2),
489
                    ("testA", 3),
490
                ]
491
            )
492
        ],
493
        [{"id": 2, "testA": 3}],
494
        [],
495
        False,
496
        True,
497
    ),
498
    # We should be able to handle numbers as column headings
499
    (
500
        "Non-string column headings",
501
        [
502
            OrderedDict(
503
                [
504
                    (1, "A"),
505
                    (2, "AA"),
506
                    ("3", "AAA"),
507
                    ("4", "AAAA"),
508
                    (Decimal("2.2"), "B"),
509
                    (2.3, "C"),
510
                    (False, "D"),
511
                ]
512
            )
513
        ],
514
        [
515
            {
516
                "2.2": "B",
517
                "2.3": "C",
518
                "False": "D",
519
            }
520
        ],
521
        [
522
            'Column "1" has been ignored because it is a number.',
523
            'Column "2" has been ignored because it is a number.',
524
            'Column "3" has been ignored because it is a number.',
525
            'Column "4" has been ignored because it is a number.',
526
        ],
527
        False,
528
        True,
529
    ),
530
]
531

532
testdata_geo = [
10✔
533
    (
534
        "WKT->geojson conversion",
535
        [
536
            {
537
                "apoint": "POINT (53.486434 -2.239353)",
538
                "alinestring": "LINESTRING (-0.173 5.626, -0.178 5.807, -0.112 5.971, -0.211 5.963, -0.321 6.17, -0.488 6.29, -0.56 6.421, -0.752 6.533, -0.867 6.607, -1.101 6.585, -1.304 6.623, -1.461 6.727, -1.628 6.713)",
539
            }
540
        ],
541
        [
542
            {
543
                "apoint": {
544
                    "type": "Point",
545
                    "coordinates": [53.486434, -2.239353],
546
                },
547
                "alinestring": {
548
                    "type": "LineString",
549
                    "coordinates": [
550
                        [-0.173, 5.626],
551
                        [-0.178, 5.807],
552
                        [-0.112, 5.971],
553
                        [-0.211, 5.963],
554
                        [-0.321, 6.170],
555
                        [-0.488, 6.290],
556
                        [-0.560, 6.421],
557
                        [-0.752, 6.533],
558
                        [-0.867, 6.607],
559
                        [-1.101, 6.585],
560
                        [-1.304, 6.623],
561
                        [-1.461, 6.727],
562
                        [-1.628, 6.713],
563
                    ],
564
                },
565
            }
566
        ],
567
        [],
568
        True,
569
        False,
570
    ),
571
    (
572
        "Invalid WKT",
573
        [
574
            {
575
                "apoint": "test",
576
                "alinestring": "(",
577
            },
578
            {
579
                "apoint": "POINT(",
580
                "alinestring": "LINESTRING()",
581
            },
582
            {
583
                "apoint": "POINT(1)",
584
                "alinestring": "LINESTRING(1 2 3 4)",
585
            },
586
        ],
587
        [{}, {}, {}],
588
        [
589
            "An invalid WKT string was supplied \"test\", the message from the parser was: ParseException: Unknown type: 'TEST'",
590
            "An invalid WKT string was supplied \"(\", the message from the parser was: ParseException: Unknown type: '('",
591
            'An invalid WKT string was supplied "POINT(", the message from the parser was: ParseException: Expected number but encountered end of stream',
592
            "An invalid WKT string was supplied \"LINESTRING()\", the message from the parser was: ParseException: Expected number but encountered ')'",
593
            "An invalid WKT string was supplied \"POINT(1)\", the message from the parser was: ParseException: Expected number but encountered ')'",
594
            'An invalid WKT string was supplied "LINESTRING(1 2 3 4)", the message from the parser was: IllegalArgumentException: point array must contain 0 or >1 elements\n',
595
        ],
596
        False,
597
        False,
598
    ),
599
]
600

601
# Test cases that require our schema aware JSON pointer logic, so must be run
602
# with the relevant schema
603
testdata_pointer = [
10✔
604
    (
605
        "Single item array without json numbering",
606
        [
607
            {
608
                "ROOT_ID": "1",
609
                "testR/id": "2",
610
                "testR/testB": "3",
611
                "testR/testX": "3",
612
            }
613
        ],
614
        [
615
            {
616
                "ROOT_ID": "1",
617
                "testR": [{"id": "2", "testB": "3", "testX": "3"}],
618
            }
619
        ],
620
        [],
621
    ),
622
    (
623
        "Multi item array one with varied numbering ",
624
        [
625
            {
626
                "ROOT_ID": "1",
627
                "testR/id": "-1",
628
                "testR/testB": "-1",
629
                "testR/testX": "-2",
630
                "testR/0/id": "0",
631
                "testR/0/testB": "1",
632
                "testR/0/testX": "1",
633
                "testR/5/id": "5",
634
                "testR/5/testB": "5",
635
                "testR/5/testX": "6",
636
            }
637
        ],
638
        [
639
            {
640
                "ROOT_ID": "1",
641
                "testR": [
642
                    {"id": "-1", "testB": "-1", "testX": "-2"},
643
                    {"id": "0", "testB": "1", "testX": "1"},
644
                    {"id": "5", "testB": "5", "testX": "6"},
645
                ],
646
            }
647
        ],
648
        [],
649
    ),
650
]
651

652

653
def create_schema(root_id):
10✔
654
    schema = {
10✔
655
        "properties": {
656
            "id": {
657
                "title": "Identifier",
658
                "type": "integer",
659
            },
660
            "testA": {
661
                "title": "A title",
662
                "type": "integer",
663
            },
664
            "testDateTime": {
665
                "type": "string",
666
                "format": "date-time",
667
            },
668
            "testDate": {"type": "string", "format": "date"},
669
            "testB": {
670
                "title": "B title",
671
                "type": "object",
672
                "properties": {
673
                    "id": {
674
                        "title": "Identifier",
675
                        "type": "integer",
676
                    },
677
                    "testC": {
678
                        "title": "C title",
679
                        "type": "integer",
680
                    },
681
                    "testD": {
682
                        "title": "D title",
683
                        "type": "integer",
684
                    },
685
                    "subField": {
686
                        "title": "Sub title",
687
                        "type": "array",
688
                        "items": {
689
                            "type": "object",
690
                            "properties": {
691
                                "id": {
692
                                    "title": "Identifier",
693
                                    "type": "integer",
694
                                },
695
                                "testE": {
696
                                    "title": "E title",
697
                                    "type": "integer",
698
                                },
699
                            },
700
                        },
701
                    },
702
                },
703
            },
704
            "testArr": {
705
                "title": "Arr title",
706
                "type": "array",
707
                "items": {
708
                    "type": "object",
709
                    "properties": {
710
                        "id": {
711
                            "title": "Identifier",
712
                            "type": "string",
713
                        },
714
                        "testB": {
715
                            "title": "B title",
716
                            "type": "string",
717
                        },
718
                        "testC": {
719
                            "title": "C title",
720
                            "type": "string",
721
                        },
722
                        "testNest": {
723
                            "title": "Nest title",
724
                            "type": "array",
725
                            "items": {
726
                                "type": "object",
727
                                "properties": {
728
                                    "id": {
729
                                        "title": "Identifier",
730
                                        "type": "string",
731
                                    },
732
                                    "testD": {
733
                                        "title": "D title",
734
                                        "type": "string",
735
                                    },
736
                                },
737
                            },
738
                        },
739
                        "testNestObj": {
740
                            "title": "NestObj title",
741
                            "type": "object",
742
                            "properties": {
743
                                "id": {
744
                                    "title": "Identifier",
745
                                    "type": "string",
746
                                },
747
                                "testD": {
748
                                    "title": "D title",
749
                                    "type": "string",
750
                                },
751
                            },
752
                        },
753
                    },
754
                },
755
            },
756
            "testR": {
757
                "title": "R title",
758
                "type": "array",
759
                "rollUp": ["id", "testB"],
760
                "items": {
761
                    "type": "object",
762
                    "properties": {
763
                        "id": {
764
                            "title": "Identifier",
765
                            "type": "string",
766
                            # 'type': 'integer',
767
                            # integer does not work, as testB:integer is not
768
                            # in the rollUp
769
                        },
770
                        "testB": {
771
                            "title": "B title",
772
                            "type": "string",
773
                        },
774
                        "testC": {
775
                            "title": "C title",
776
                            "type": "string",
777
                        },
778
                        "testSA": {
779
                            "title": "SA title",
780
                            "type": "array",
781
                            "items": {"type": "string"},
782
                        },
783
                    },
784
                },
785
            },
786
            "testU": {
787
                "title": UNICODE_TEST_STRING,
788
                "type": "string",
789
            },
790
            "testSA": {
791
                "title": "SA title",
792
                "type": "array",
793
                "items": {"type": "string"},
794
            },
795
            "apoint": {
796
                "type": "object",
797
                "properties": {
798
                    "type": {},
799
                    "coordinates": {},
800
                },
801
            },
802
            "alinestring": {
803
                "type": "object",
804
                "properties": {
805
                    "type": {},
806
                    "coordinates": {},
807
                },
808
            },
809
        }
810
    }
811
    if root_id:
10✔
812
        schema.update({root_id: {"title": ROOT_ID_TITLES[root_id], "type": "string"}})
10✔
813
    return schema
10✔
814

815

816
testdata_titles = [
10✔
817
    (
818
        "Basic flat",
819
        [{"ROOT_ID_TITLE": 1, "Identifier": 2, "A title": 3}],
820
        [{"ROOT_ID": 1, "id": 2, "testA": 3}],
821
        [],
822
        True,
823
    ),
824
    (
825
        "Nested",
826
        [
827
            {
828
                "ROOT_ID_TITLE": 1,
829
                "Identifier": 2,
830
                "B title:C title": 3,
831
                "B title:D title": 4,
832
            }
833
        ],
834
        [{"ROOT_ID": 1, "id": 2, "testB": {"testC": 3, "testD": 4}}],
835
        [],
836
        True,
837
    ),
838
    (
839
        "Nested titles should be converted individually",
840
        [
841
            {
842
                "ROOT_ID_TITLE": 1,
843
                "Identifier": 2,
844
                "B title:C title": 3,
845
                "B title:Not in schema": 4,
846
            }
847
        ],
848
        [{"ROOT_ID": 1, "id": 2, "testB": {"testC": 3, "Not in schema": 4}}],
849
        [],
850
        False,
851
    ),
852
    (
853
        "Should be space and case invariant",
854
        [
855
            {
856
                "ROOT_ID_TITLE": 1,
857
                "Identifier": 2,
858
                "B  title : c  title": 3,
859
                "btitle : Not in schema": 4,
860
            }
861
        ],
862
        [{"ROOT_ID": 1, "id": 2, "testB": {"testC": 3, "Not in schema": 4}}],
863
        [],
864
        False,
865
    ),
866
    (
867
        "Unicode",
868
        [
869
            {
870
                "ROOT_ID_TITLE": UNICODE_TEST_STRING,
871
                UNICODE_TEST_STRING: UNICODE_TEST_STRING,
872
            }
873
        ],
874
        [{"ROOT_ID": UNICODE_TEST_STRING, "testU": UNICODE_TEST_STRING}],
875
        [],
876
        True,
877
    ),
878
    (
879
        "Single item array",
880
        [
881
            {
882
                "ROOT_ID_TITLE": 1,
883
                "Identifier": 2,
884
                "R title:Identifier": 3,
885
                "R title:B title": 4,
886
            }
887
        ],
888
        [
889
            {
890
                "ROOT_ID": 1,
891
                "id": 2,
892
                "testR": [{"id": "3", "testB": "4"}],
893
            }
894
        ],
895
        [],
896
        False,
897
    ),
898
    (
899
        "Single item array without parent ID",
900
        [{"ROOT_ID_TITLE": "1", "R title:Identifier": "2", "R title:B title": "3"}],
901
        [
902
            {
903
                "ROOT_ID": "1",
904
                "testR": [{"id": "2", "testB": "3"}],
905
            }
906
        ],
907
        [],
908
        False,
909
    ),
910
    (
911
        """
912
        Properties of a single item array shouldn't need to be in rollUp list
913
        for their titles to be converted
914
        """,
915
        [
916
            {
917
                "ROOT_ID_TITLE": 1,
918
                "Identifier": 2,
919
                "R title:Identifier": 3,
920
                "R title:C title": 4,
921
            }
922
        ],
923
        [
924
            {
925
                "ROOT_ID": 1,
926
                "id": 2,
927
                "testR": [{"id": "3", "testC": "4"}],
928
            }
929
        ],
930
        [],
931
        False,
932
    ),
933
    (
934
        "Single item array, titles should be converted individually",
935
        [
936
            {
937
                "ROOT_ID_TITLE": 1,
938
                "Identifier": 2,
939
                "R title:C title": 3,
940
                "R title:Not in schema": 4,
941
            }
942
        ],
943
        [
944
            {
945
                "ROOT_ID": 1,
946
                "id": 2,
947
                "testR": [{"testC": "3", "Not in schema": 4}],
948
            }
949
        ],
950
        [],
951
        False,
952
    ),
953
    (
954
        "Multi item array, allow numbering",
955
        [
956
            {
957
                "ROOT_ID_TITLE": 1,
958
                "Identifier": 2,
959
                "R title:C title": 3,
960
                "R title:Not in schema": 4,
961
                "R title:0:C title": 5,
962
                "R title:0:Not in schema": 6,
963
                "R title:5:C title": 7,
964
                "R title:5:Not in schema": 8,
965
            }
966
        ],
967
        [
968
            {
969
                "ROOT_ID": 1,
970
                "id": 2,
971
                "testR": [
972
                    {"testC": "3", "Not in schema": 4},
973
                    {"testC": "5", "Not in schema": 6},
974
                    {"testC": "7", "Not in schema": 8},
975
                ],
976
            }
977
        ],
978
        [],
979
        False,
980
    ),
981
    (
982
        "Empty",
983
        [
984
            {
985
                "ROOT_ID_TITLE": "",
986
                "Identifier": "",
987
                "A title": "",
988
                "B title": "",
989
                "C title": "",
990
                "D title": "",
991
                "E title": "",
992
            }
993
        ],
994
        [],
995
        [],
996
        False,
997
    ),
998
    (
999
        "Empty except for root id",
1000
        [
1001
            {
1002
                "ROOT_ID_TITLE": 1,
1003
                "Identifier": "",
1004
                "A title": "",
1005
                "B title": "",
1006
                "C title": "",
1007
                "D title": "",
1008
                "E title": "",
1009
            }
1010
        ],
1011
        [{"ROOT_ID": 1}],
1012
        [],
1013
        False,
1014
    ),
1015
    (
1016
        "Test arrays of strings (1 item)",
1017
        [
1018
            {
1019
                "ROOT_ID_TITLE": 1,
1020
                "Identifier": 2,
1021
                "SA title": "a",
1022
            }
1023
        ],
1024
        [
1025
            {
1026
                "ROOT_ID": 1,
1027
                "id": 2,
1028
                "testSA": ["a"],
1029
            }
1030
        ],
1031
        [],
1032
        True,
1033
    ),
1034
    (
1035
        "Test arrays of strings (2 items)",
1036
        [
1037
            {
1038
                "ROOT_ID_TITLE": 1,
1039
                "Identifier": 2,
1040
                "SA title": "a;b",
1041
            }
1042
        ],
1043
        [
1044
            {
1045
                "ROOT_ID": 1,
1046
                "id": 2,
1047
                "testSA": ["a", "b"],
1048
            }
1049
        ],
1050
        [],
1051
        True,
1052
    ),
1053
    (
1054
        "Test arrays of strings within an object array (1 item)",
1055
        [
1056
            {
1057
                "ROOT_ID_TITLE": 1,
1058
                "Identifier": 2,
1059
                "R title:SA title": "a",
1060
            }
1061
        ],
1062
        [
1063
            {
1064
                "ROOT_ID": 1,
1065
                "id": 2,
1066
                "testR": [
1067
                    {
1068
                        "testSA": ["a"],
1069
                    }
1070
                ],
1071
            }
1072
        ],
1073
        [],
1074
        False,
1075
    ),
1076
    (
1077
        "Test arrays of strings within an object array (2 items)",
1078
        [
1079
            {
1080
                "ROOT_ID_TITLE": 1,
1081
                "Identifier": 2,
1082
                "R title:SA title": "a;b",
1083
            }
1084
        ],
1085
        [
1086
            {
1087
                "ROOT_ID": 1,
1088
                "id": 2,
1089
                "testR": [
1090
                    {
1091
                        "testSA": ["a", "b"],
1092
                    }
1093
                ],
1094
            }
1095
        ],
1096
        [],
1097
        False,
1098
    ),
1099
]
1100

1101

1102
ROOT_ID_PARAMS = [
10✔
1103
    ("ocid", {}),  # If not root_id kwarg is passed, then a root_id of ocid is assumed
1104
    ("ocid", {"root_id": "ocid"}),
1105
    ("custom", {"root_id": "custom"}),
1106
    ("", {"root_id": ""}),
1107
]
1108

1109
# Since we're not using titles, and titles mode should fall back to assuming
1110
# we've supplied a fieldname, we should be able to run this test with
1111
# convert_titles and use_schema as True or False
1112
@pytest.mark.parametrize("convert_titles", [True, False])
10✔
1113
@pytest.mark.parametrize("use_schema", [True, False])
10✔
1114
@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS)
10✔
1115
@pytest.mark.parametrize(
10✔
1116
    "comment,input_list,expected_output_list,warning_messages,reversible,works_without_schema",
1117
    testdata,
1118
)
1119
def test_unflatten(
8✔
1120
    convert_titles,
1121
    use_schema,
1122
    root_id,
1123
    root_id_kwargs,
1124
    input_list,
1125
    expected_output_list,
1126
    recwarn,
1127
    comment,
1128
    warning_messages,
1129
    reversible,
1130
    works_without_schema,
1131
):
1132
    _test_unflatten_worker(
×
1133
        convert_titles,
1134
        use_schema,
1135
        root_id,
1136
        root_id_kwargs,
1137
        input_list,
1138
        expected_output_list,
1139
        recwarn,
1140
        comment,
1141
        warning_messages,
1142
        reversible,
1143
        works_without_schema,
1144
    )
1145

1146

1147
@pytest.mark.parametrize("convert_titles", [True, False])
10✔
1148
@pytest.mark.parametrize("use_schema", [True, False])
10✔
1149
@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS)
10✔
1150
@pytest.mark.parametrize(
10✔
1151
    "comment,input_list,expected_output_list,warning_messages,reversible,works_without_schema",
1152
    testdata_geo,
1153
)
1154
@pytest.mark.geo
10✔
1155
def test_unflatten_geo(
8✔
1156
    convert_titles,
1157
    use_schema,
1158
    root_id,
1159
    root_id_kwargs,
1160
    input_list,
1161
    expected_output_list,
1162
    recwarn,
1163
    comment,
1164
    warning_messages,
1165
    reversible,
1166
    works_without_schema,
1167
):
1168
    _test_unflatten_worker(
10✔
1169
        convert_titles,
1170
        use_schema,
1171
        root_id,
1172
        root_id_kwargs,
1173
        input_list,
1174
        expected_output_list,
1175
        recwarn,
1176
        comment,
1177
        warning_messages,
1178
        reversible,
1179
        works_without_schema,
1180
    )
1181

1182

1183
def _test_unflatten_worker(
10✔
1184
    convert_titles,
1185
    use_schema,
1186
    root_id,
1187
    root_id_kwargs,
1188
    input_list,
1189
    expected_output_list,
1190
    recwarn,
1191
    comment,
1192
    warning_messages,
1193
    reversible,
1194
    works_without_schema,
1195
):
1196
    if not use_schema and not works_without_schema:
10✔
1197
        pytest.skip()
10✔
1198

1199
    # Not sure why, but this seems to be necessary to have warnings picked up
1200
    # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it
1201
    import warnings
10✔
1202

1203
    warnings.simplefilter("always")
10✔
1204

1205
    convert_flags = {"wkt": "WKT" in comment}
10✔
1206

1207
    extra_kwargs = {"convert_titles": convert_titles, "convert_flags": convert_flags}
10✔
1208
    extra_kwargs.update(root_id_kwargs)
10✔
1209
    spreadsheet_input = ListInput(
10✔
1210
        sheets={
1211
            "custom_main": [
1212
                inject_root_id(root_id, input_row) for input_row in input_list
1213
            ]
1214
        },
1215
        **extra_kwargs
1216
    )
1217
    spreadsheet_input.read_sheets()
10✔
1218

1219
    parser = SchemaParser(
10✔
1220
        root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}},
1221
        root_id=root_id,
1222
        rollup=True,
1223
        convert_flags=convert_flags,
1224
    )
1225
    parser.parse()
10✔
1226
    spreadsheet_input.parser = parser
10✔
1227

1228
    expected_output_list = [
10✔
1229
        inject_root_id(root_id, expected_output_dict)
1230
        for expected_output_dict in expected_output_list
1231
    ]
1232
    if expected_output_list == [{}]:
10✔
1233
        # We don't expect an empty dictionary
1234
        expected_output_list = []
×
1235
    assert list(spreadsheet_input.unflatten()) == expected_output_list
10✔
1236
    # We expect no warning_messages
1237
    if not convert_titles:  # TODO what are the warning_messages here
10✔
1238
        assert [str(x.message) for x in recwarn.list] == warning_messages
10✔
1239

1240

1241
@pytest.mark.parametrize("convert_titles", [True, False])
10✔
1242
@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS)
10✔
1243
@pytest.mark.parametrize(
10✔
1244
    "comment,input_list,expected_output_list,warning_messages", testdata_pointer
1245
)
1246
def test_unflatten_pointer(
8✔
1247
    convert_titles,
1248
    root_id,
1249
    root_id_kwargs,
1250
    input_list,
1251
    expected_output_list,
1252
    recwarn,
1253
    comment,
1254
    warning_messages,
1255
):
1256
    return test_unflatten(
×
1257
        convert_titles=convert_titles,
1258
        use_schema=True,
1259
        root_id=root_id,
1260
        root_id_kwargs=root_id_kwargs,
1261
        input_list=input_list,
1262
        expected_output_list=expected_output_list,
1263
        recwarn=recwarn,
1264
        comment=comment,
1265
        warning_messages=warning_messages,
1266
        reversible=False,
1267
        works_without_schema=True,
1268
    )
1269

1270

1271
@pytest.mark.parametrize(
10✔
1272
    "comment,input_list,expected_output_list,warning_messages,reversible",
1273
    testdata_titles,
1274
)
1275
@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS)
10✔
1276
def test_unflatten_titles(
8✔
1277
    root_id,
1278
    root_id_kwargs,
1279
    input_list,
1280
    expected_output_list,
1281
    recwarn,
1282
    comment,
1283
    warning_messages,
1284
    reversible,
1285
):
1286
    """
1287
    Essentially the same as test unflatten, except that convert_titles and
1288
    use_schema are always true, as both of these are needed to convert titles
1289
    properly. (and runs with different test data).
1290
    """
1291
    if root_id != "":
×
1292
        # Skip all tests with a root ID for now, as this is broken
1293
        # https://github.com/OpenDataServices/flatten-tool/issues/84
1294
        pytest.skip()
×
1295
    return test_unflatten(
×
1296
        convert_titles=True,
1297
        use_schema=True,
1298
        root_id=root_id,
1299
        root_id_kwargs=root_id_kwargs,
1300
        input_list=input_list,
1301
        expected_output_list=expected_output_list,
1302
        recwarn=recwarn,
1303
        comment=comment,
1304
        warning_messages=warning_messages,
1305
        reversible=reversible,
1306
        works_without_schema=True,
1307
    )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc