• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenDataServices / flatten-tool / 6507626273

13 Oct 2023 11:25AM UTC coverage: 42.006% (-53.7%) from 95.72%
6507626273

Pull #433

github

odscjames
New "Geo" optional dependencies

https://github.com/OpenDataServices/flatten-tool/issues/424
Pull Request #433: New "Geo" optional dependencies

38 of 38 new or added lines in 6 files covered. (100.0%)

1466 of 3490 relevant lines covered (42.01%)

4.16 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

25.0
/flattentool/tests/test_init.py
1
# -*- coding: utf-8 -*-
2
from __future__ import unicode_literals
10✔
3

4
import json
10✔
5
from decimal import Decimal
10✔
6

7
import pytest
10✔
8

9
from flattentool import decimal_default, unflatten
10✔
10

11

12
def original_cell_and_row_locations(data):
10✔
13
    """
14
    Cells should each appear only once
15

16
    Rows should appear the number of times a column in it resolves to a unique dictionary
17
    """
18
    cells = []
×
19
    rows = {}
×
20
    for key in data:
×
21
        cell_list = data[key]
×
22
        for cell in cell_list:
×
23
            if len(cell) == 2:
×
24
                # This is a row
25
                row_str = "{}:{}".format(cell[0], cell[1])
×
26
                if row_str not in rows:
×
27
                    rows[row_str] = 1
×
28
                else:
29
                    rows[row_str] += 1
×
30
            else:
31
                # This is a cell
32
                cell_str = "{}:{}{}".format(cell[0], cell[1], cell[2])
×
33
                assert cell_str not in cells
×
34
                cells.append(cell_str)
×
35
    cells.sort()
×
36
    return cells, rows
×
37

38

39
def original_headings(heading_data):
10✔
40
    """\
41
    """
42
    headings = []
×
43
    for key in heading_data:
×
44
        cell_list = heading_data[key]
×
45
        for cell in cell_list:
×
46
            assert len(cell) == 2
×
47
            heading_str = "{}:{}".format(cell[0], cell[1])
×
48
            assert heading_str not in headings
×
49
            headings.append(heading_str)
×
50
    headings.sort()
×
51
    return headings
×
52

53

54
def test_decimal_default():
10✔
55
    assert json.dumps(Decimal("1.2"), default=decimal_default) == "1.2"
×
56
    assert json.dumps(Decimal("42"), default=decimal_default) == "42"
×
57

58

59
def lines_strip_whitespace(text):
10✔
60
    lines = text.split("\n")
×
61
    return "\n".join(line.strip() for line in lines)
×
62

63

64
def test_unflatten(tmpdir):
10✔
65
    """
66
    Perform a full CSV unflattening, and check the output is what we expect.
67

68
    Notable things we are checking for:
69
        Ordering is preserved - both the order of columns and rows
70
    """
71
    input_dir = tmpdir.ensure("release_input", dir=True)
×
72
    input_dir.join("main.csv").write(
×
73
        "ocid,id,testA,test/id,test/C\n"
74
        "1,2,3,4,5\n"
75
        "1,2a,3a,4a,5a\n"
76
        "6,7,8,9,10\n"
77
        "6,7a,8a,9a,10a\n"
78
    )
79
    input_dir.join("subsheet.csv").write(
×
80
        "ocid,id,sub/0/id,sub/0/testD,sub/0/test2/E,sub/0/test2/F\n"
81
        "1,2,S1,11,12,13\n"
82
        "1,2a,S1,14,15,16\n"
83
        "1,2,S2,17,18,19\n"
84
        "6,7,S1,20,21,22\n"
85
    )
86
    input_dir.join("subsheet_test.csv").write(
×
87
        "ocid,id,test/id,test/subsheet/0/id,test/subsheet/0/testD,test/subsheet/0/test2/E,test/subsheet/0/test2/F\n"
88
        "1,2,4,S3,24,25,26\n"
89
    )
90
    input_dir.join("subsubsheet.csv").write(
×
91
        "ocid,id,sub/0/id,sub/0/subsub/0/testG\n" "1,2,S1,23\n"
92
    )
93
    unflatten(
×
94
        input_dir.strpath,
95
        input_format="csv",
96
        output_name=tmpdir.join("release.json").strpath,
97
        main_sheet_name="main",
98
        cell_source_map=tmpdir.join("cell_source_map.json").strpath,
99
        heading_source_map=tmpdir.join("heading_source_map.json").strpath,
100
    )
101
    # Note, "main/0/testA": comes after "main/0/test" because 'testA' > 'testA'
102
    # Note also that all the row entries come after the cell ones
103
    expected = """{
×
104
        "main/0/id": [
105
            [
106
                "main",
107
                "B",
108
                2,
109
                "id"
110
            ],
111
            [
112
                "subsheet",
113
                "B",
114
                2,
115
                "id"
116
            ],
117
            [
118
                "subsheet",
119
                "B",
120
                4,
121
                "id"
122
            ],
123
            [
124
                "subsheet_test",
125
                "B",
126
                2,
127
                "id"
128
            ],
129
            [
130
                "subsubsheet",
131
                "B",
132
                2,
133
                "id"
134
            ]
135
        ],
136
        "main/0/ocid": [
137
            [
138
                "main",
139
                "A",
140
                2,
141
                "ocid"
142
            ],
143
            [
144
                "subsheet",
145
                "A",
146
                2,
147
                "ocid"
148
            ],
149
            [
150
                "subsheet",
151
                "A",
152
                4,
153
                "ocid"
154
            ],
155
            [
156
                "subsheet_test",
157
                "A",
158
                2,
159
                "ocid"
160
            ],
161
            [
162
                "subsubsheet",
163
                "A",
164
                2,
165
                "ocid"
166
            ]
167
        ],
168
        "main/0/sub/0/id": [
169
            [
170
                "subsheet",
171
                "C",
172
                2,
173
                "sub/0/id"
174
            ],
175
            [
176
                "subsubsheet",
177
                "C",
178
                2,
179
                "sub/0/id"
180
            ]
181
        ],
182
        "main/0/sub/0/subsub/0/testG": [
183
            [
184
                "subsubsheet",
185
                "D",
186
                2,
187
                "sub/0/subsub/0/testG"
188
            ]
189
        ],
190
        "main/0/sub/0/test2/E": [
191
            [
192
                "subsheet",
193
                "E",
194
                2,
195
                "sub/0/test2/E"
196
            ]
197
        ],
198
        "main/0/sub/0/test2/F": [
199
            [
200
                "subsheet",
201
                "F",
202
                2,
203
                "sub/0/test2/F"
204
            ]
205
        ],
206
        "main/0/sub/0/testD": [
207
            [
208
                "subsheet",
209
                "D",
210
                2,
211
                "sub/0/testD"
212
            ]
213
        ],
214
        "main/0/sub/1/id": [
215
            [
216
                "subsheet",
217
                "C",
218
                4,
219
                "sub/0/id"
220
            ]
221
        ],
222
        "main/0/sub/1/test2/E": [
223
            [
224
                "subsheet",
225
                "E",
226
                4,
227
                "sub/0/test2/E"
228
            ]
229
        ],
230
        "main/0/sub/1/test2/F": [
231
            [
232
                "subsheet",
233
                "F",
234
                4,
235
                "sub/0/test2/F"
236
            ]
237
        ],
238
        "main/0/sub/1/testD": [
239
            [
240
                "subsheet",
241
                "D",
242
                4,
243
                "sub/0/testD"
244
            ]
245
        ],
246
        "main/0/test/C": [
247
            [
248
                "main",
249
                "E",
250
                2,
251
                "test/C"
252
            ]
253
        ],
254
        "main/0/test/id": [
255
            [
256
                "main",
257
                "D",
258
                2,
259
                "test/id"
260
            ],
261
            [
262
                "subsheet_test",
263
                "C",
264
                2,
265
                "test/id"
266
            ]
267
        ],
268
        "main/0/test/subsheet/0/id": [
269
            [
270
                "subsheet_test",
271
                "D",
272
                2,
273
                "test/subsheet/0/id"
274
            ]
275
        ],
276
        "main/0/test/subsheet/0/test2/E": [
277
            [
278
                "subsheet_test",
279
                "F",
280
                2,
281
                "test/subsheet/0/test2/E"
282
            ]
283
        ],
284
        "main/0/test/subsheet/0/test2/F": [
285
            [
286
                "subsheet_test",
287
                "G",
288
                2,
289
                "test/subsheet/0/test2/F"
290
            ]
291
        ],
292
        "main/0/test/subsheet/0/testD": [
293
            [
294
                "subsheet_test",
295
                "E",
296
                2,
297
                "test/subsheet/0/testD"
298
            ]
299
        ],
300
        "main/0/testA": [
301
            [
302
                "main",
303
                "C",
304
                2,
305
                "testA"
306
            ]
307
        ],
308
        "main/1/id": [
309
            [
310
                "main",
311
                "B",
312
                3,
313
                "id"
314
            ],
315
            [
316
                "subsheet",
317
                "B",
318
                3,
319
                "id"
320
            ]
321
        ],
322
        "main/1/ocid": [
323
            [
324
                "main",
325
                "A",
326
                3,
327
                "ocid"
328
            ],
329
            [
330
                "subsheet",
331
                "A",
332
                3,
333
                "ocid"
334
            ]
335
        ],
336
        "main/1/sub/0/id": [
337
            [
338
                "subsheet",
339
                "C",
340
                3,
341
                "sub/0/id"
342
            ]
343
        ],
344
        "main/1/sub/0/test2/E": [
345
            [
346
                "subsheet",
347
                "E",
348
                3,
349
                "sub/0/test2/E"
350
            ]
351
        ],
352
        "main/1/sub/0/test2/F": [
353
            [
354
                "subsheet",
355
                "F",
356
                3,
357
                "sub/0/test2/F"
358
            ]
359
        ],
360
        "main/1/sub/0/testD": [
361
            [
362
                "subsheet",
363
                "D",
364
                3,
365
                "sub/0/testD"
366
            ]
367
        ],
368
        "main/1/test/C": [
369
            [
370
                "main",
371
                "E",
372
                3,
373
                "test/C"
374
            ]
375
        ],
376
        "main/1/test/id": [
377
            [
378
                "main",
379
                "D",
380
                3,
381
                "test/id"
382
            ]
383
        ],
384
        "main/1/testA": [
385
            [
386
                "main",
387
                "C",
388
                3,
389
                "testA"
390
            ]
391
        ],
392
        "main/2/id": [
393
            [
394
                "main",
395
                "B",
396
                4,
397
                "id"
398
            ],
399
            [
400
                "subsheet",
401
                "B",
402
                5,
403
                "id"
404
            ]
405
        ],
406
        "main/2/ocid": [
407
            [
408
                "main",
409
                "A",
410
                4,
411
                "ocid"
412
            ],
413
            [
414
                "subsheet",
415
                "A",
416
                5,
417
                "ocid"
418
            ]
419
        ],
420
        "main/2/sub/0/id": [
421
            [
422
                "subsheet",
423
                "C",
424
                5,
425
                "sub/0/id"
426
            ]
427
        ],
428
        "main/2/sub/0/test2/E": [
429
            [
430
                "subsheet",
431
                "E",
432
                5,
433
                "sub/0/test2/E"
434
            ]
435
        ],
436
        "main/2/sub/0/test2/F": [
437
            [
438
                "subsheet",
439
                "F",
440
                5,
441
                "sub/0/test2/F"
442
            ]
443
        ],
444
        "main/2/sub/0/testD": [
445
            [
446
                "subsheet",
447
                "D",
448
                5,
449
                "sub/0/testD"
450
            ]
451
        ],
452
        "main/2/test/C": [
453
            [
454
                "main",
455
                "E",
456
                4,
457
                "test/C"
458
            ]
459
        ],
460
        "main/2/test/id": [
461
            [
462
                "main",
463
                "D",
464
                4,
465
                "test/id"
466
            ]
467
        ],
468
        "main/2/testA": [
469
            [
470
                "main",
471
                "C",
472
                4,
473
                "testA"
474
            ]
475
        ],
476
        "main/3/id": [
477
            [
478
                "main",
479
                "B",
480
                5,
481
                "id"
482
            ]
483
        ],
484
        "main/3/ocid": [
485
            [
486
                "main",
487
                "A",
488
                5,
489
                "ocid"
490
            ]
491
        ],
492
        "main/3/test/C": [
493
            [
494
                "main",
495
                "E",
496
                5,
497
                "test/C"
498
            ]
499
        ],
500
        "main/3/test/id": [
501
            [
502
                "main",
503
                "D",
504
                5,
505
                "test/id"
506
            ]
507
        ],
508
        "main/3/testA": [
509
            [
510
                "main",
511
                "C",
512
                5,
513
                "testA"
514
            ]
515
        ],
516
        "main/0": [
517
            [
518
                "main",
519
                2
520
            ],
521
            [
522
                "subsheet",
523
                2
524
            ],
525
            [
526
                "subsheet",
527
                4
528
            ],
529
            [
530
                "subsheet_test",
531
                2
532
            ],
533
            [
534
                "subsubsheet",
535
                2
536
            ]
537
        ],
538
        "main/0/sub/0": [
539
            [
540
                "subsheet",
541
                2
542
            ],
543
            [
544
                "subsubsheet",
545
                2
546
            ]
547
        ],
548
        "main/0/sub/0/subsub/0": [
549
            [
550
                "subsubsheet",
551
                2
552
            ]
553
        ],
554
        "main/0/sub/0/test2": [
555
            [
556
                "subsheet",
557
                2
558
            ]
559
        ],
560
        "main/0/sub/1": [
561
            [
562
                "subsheet",
563
                4
564
            ]
565
        ],
566
        "main/0/sub/1/test2": [
567
            [
568
                "subsheet",
569
                4
570
            ]
571
        ],
572
        "main/0/test": [
573
            [
574
                "main",
575
                2
576
            ],
577
            [
578
                "subsheet_test",
579
                2
580
            ]
581
        ],
582
        "main/0/test/subsheet/0": [
583
            [
584
                "subsheet_test",
585
                2
586
            ]
587
        ],
588
        "main/0/test/subsheet/0/test2": [
589
            [
590
                "subsheet_test",
591
                2
592
            ]
593
        ],
594
        "main/1": [
595
            [
596
                "main",
597
                3
598
            ],
599
            [
600
                "subsheet",
601
                3
602
            ]
603
        ],
604
        "main/1/sub/0": [
605
            [
606
                "subsheet",
607
                3
608
            ]
609
        ],
610
        "main/1/sub/0/test2": [
611
            [
612
                "subsheet",
613
                3
614
            ]
615
        ],
616
        "main/1/test": [
617
            [
618
                "main",
619
                3
620
            ]
621
        ],
622
        "main/2": [
623
            [
624
                "main",
625
                4
626
            ],
627
            [
628
                "subsheet",
629
                5
630
            ]
631
        ],
632
        "main/2/sub/0": [
633
            [
634
                "subsheet",
635
                5
636
            ]
637
        ],
638
        "main/2/sub/0/test2": [
639
            [
640
                "subsheet",
641
                5
642
            ]
643
        ],
644
        "main/2/test": [
645
            [
646
                "main",
647
                4
648
            ]
649
        ],
650
        "main/3": [
651
            [
652
                "main",
653
                5
654
            ]
655
        ],
656
        "main/3/test": [
657
            [
658
                "main",
659
                5
660
            ]
661
        ]
662
    }"""
663
    assert lines_strip_whitespace(
×
664
        tmpdir.join("cell_source_map.json").read()
665
    ) == lines_strip_whitespace(expected)
666
    data = json.loads(expected)
×
667
    cells, rows = original_cell_and_row_locations(data)
×
668
    # Make sure every cell in the original appeared in the cell source map exactly once
669
    assert cells == [
×
670
        "main:A2",
671
        "main:A3",
672
        "main:A4",
673
        "main:A5",
674
        "main:B2",
675
        "main:B3",
676
        "main:B4",
677
        "main:B5",
678
        "main:C2",
679
        "main:C3",
680
        "main:C4",
681
        "main:C5",
682
        "main:D2",
683
        "main:D3",
684
        "main:D4",
685
        "main:D5",
686
        "main:E2",
687
        "main:E3",
688
        "main:E4",
689
        "main:E5",
690
        "subsheet:A2",
691
        "subsheet:A3",
692
        "subsheet:A4",
693
        "subsheet:A5",
694
        "subsheet:B2",
695
        "subsheet:B3",
696
        "subsheet:B4",
697
        "subsheet:B5",
698
        "subsheet:C2",
699
        "subsheet:C3",
700
        "subsheet:C4",
701
        "subsheet:C5",
702
        "subsheet:D2",
703
        "subsheet:D3",
704
        "subsheet:D4",
705
        "subsheet:D5",
706
        "subsheet:E2",
707
        "subsheet:E3",
708
        "subsheet:E4",
709
        "subsheet:E5",
710
        "subsheet:F2",
711
        "subsheet:F3",
712
        "subsheet:F4",
713
        "subsheet:F5",
714
        "subsheet_test:A2",
715
        "subsheet_test:B2",
716
        "subsheet_test:C2",
717
        "subsheet_test:D2",
718
        "subsheet_test:E2",
719
        "subsheet_test:F2",
720
        "subsheet_test:G2",
721
        "subsubsheet:A2",
722
        "subsubsheet:B2",
723
        "subsubsheet:C2",
724
        "subsubsheet:D2",
725
    ]
726
    # Make sure every row in the original appeared the number of times a column in it resolves to a unique dictionary
727
    assert rows == {
×
728
        "main:2": 2,
729
        "main:3": 2,
730
        "main:4": 2,
731
        "main:5": 2,
732
        "subsheet:2": 3,
733
        "subsheet:3": 3,
734
        "subsheet:4": 3,
735
        "subsheet:5": 3,
736
        "subsheet_test:2": 4,
737
        "subsubsheet:2": 3,
738
    }
739
    # TODO Check column names with a JSON schema
740
    expected_headings = """{
×
741
        "main/id": [
742
            [
743
                "main",
744
                "id"
745
            ],
746
            [
747
                "subsheet",
748
                "id"
749
            ],
750
            [
751
                "subsheet_test",
752
                "id"
753
            ],
754
            [
755
                "subsubsheet",
756
                "id"
757
            ]
758
        ],
759
        "main/ocid": [
760
            [
761
                "main",
762
                "ocid"
763
            ],
764
            [
765
                "subsheet",
766
                "ocid"
767
            ],
768
            [
769
                "subsheet_test",
770
                "ocid"
771
            ],
772
            [
773
                "subsubsheet",
774
                "ocid"
775
            ]
776
        ],
777
        "main/sub/id": [
778
            [
779
                "subsheet",
780
                "sub/0/id"
781
            ],
782
            [
783
                "subsubsheet",
784
                "sub/0/id"
785
            ]
786
        ],
787
        "main/sub/subsub/testG": [
788
            [
789
                "subsubsheet",
790
                "sub/0/subsub/0/testG"
791
            ]
792
        ],
793
        "main/sub/test2/E": [
794
            [
795
                "subsheet",
796
                "sub/0/test2/E"
797
            ]
798
        ],
799
        "main/sub/test2/F": [
800
            [
801
                "subsheet",
802
                "sub/0/test2/F"
803
            ]
804
        ],
805
        "main/sub/testD": [
806
            [
807
                "subsheet",
808
                "sub/0/testD"
809
            ]
810
        ],
811
        "main/test/C": [
812
            [
813
                "main",
814
                "test/C"
815
            ]
816
        ],
817
        "main/test/id": [
818
            [
819
                "main",
820
                "test/id"
821
            ],
822
            [
823
                "subsheet_test",
824
                "test/id"
825
            ]
826
        ],
827
        "main/test/subsheet/id": [
828
            [
829
                "subsheet_test",
830
                "test/subsheet/0/id"
831
            ]
832
        ],
833
        "main/test/subsheet/test2/E": [
834
            [
835
                "subsheet_test",
836
                "test/subsheet/0/test2/E"
837
            ]
838
        ],
839
        "main/test/subsheet/test2/F": [
840
            [
841
                "subsheet_test",
842
                "test/subsheet/0/test2/F"
843
            ]
844
        ],
845
        "main/test/subsheet/testD": [
846
            [
847
                "subsheet_test",
848
                "test/subsheet/0/testD"
849
            ]
850
        ],
851
        "main/testA": [
852
            [
853
                "main",
854
                "testA"
855
            ]
856
        ]
857
    }"""
858
    assert lines_strip_whitespace(
×
859
        tmpdir.join("heading_source_map.json").read()
860
    ) == lines_strip_whitespace(expected_headings)
861
    heading_data = json.loads(expected_headings)
×
862
    headings = original_headings(heading_data)
×
863
    # Make sure every heading in the original appeared in the heading source map exactly once
864
    assert headings == [
×
865
        "main:id",
866
        "main:ocid",
867
        "main:test/C",
868
        "main:test/id",
869
        "main:testA",
870
        "subsheet:id",
871
        "subsheet:ocid",
872
        "subsheet:sub/0/id",
873
        "subsheet:sub/0/test2/E",
874
        "subsheet:sub/0/test2/F",
875
        "subsheet:sub/0/testD",
876
        "subsheet_test:id",
877
        "subsheet_test:ocid",
878
        "subsheet_test:test/id",
879
        "subsheet_test:test/subsheet/0/id",
880
        "subsheet_test:test/subsheet/0/test2/E",
881
        "subsheet_test:test/subsheet/0/test2/F",
882
        "subsheet_test:test/subsheet/0/testD",
883
        "subsubsheet:id",
884
        "subsubsheet:ocid",
885
        "subsubsheet:sub/0/id",
886
        "subsubsheet:sub/0/subsub/0/testG",
887
    ]
888
    assert lines_strip_whitespace(
×
889
        tmpdir.join("release.json").read()
890
    ) == lines_strip_whitespace(
891
        """{
892
    "main": [
893
        {
894
            "ocid": "1",
895
            "id": "2",
896
            "testA": "3",
897
            "test": {
898
                "id": "4",
899
                "C": "5",
900
                "subsheet": [
901
                    {
902
                        "id": "S3",
903
                        "testD": "24",
904
                        "test2": {
905
                            "E": "25",
906
                            "F": "26"
907
                        }
908
                    }
909
                ]
910
            },
911
            "sub": [
912
                {
913
                    "id": "S1",
914
                    "testD": "11",
915
                    "test2": {
916
                        "E": "12",
917
                        "F": "13"
918
                    },
919
                    "subsub": [
920
                        {
921
                            "testG": "23"
922
                        }
923
                    ]
924
                },
925
                {
926
                    "id": "S2",
927
                    "testD": "17",
928
                    "test2": {
929
                        "E": "18",
930
                        "F": "19"
931
                    }
932
                }
933
            ]
934
        },
935
        {
936
            "ocid": "1",
937
            "id": "2a",
938
            "testA": "3a",
939
            "test": {
940
                "id": "4a",
941
                "C": "5a"
942
            },
943
            "sub": [
944
                {
945
                    "id": "S1",
946
                    "testD": "14",
947
                    "test2": {
948
                        "E": "15",
949
                        "F": "16"
950
                    }
951
                }
952
            ]
953
        },
954
        {
955
            "ocid": "6",
956
            "id": "7",
957
            "testA": "8",
958
            "test": {
959
                "id": "9",
960
                "C": "10"
961
            },
962
            "sub": [
963
                {
964
                    "id": "S1",
965
                    "testD": "20",
966
                    "test2": {
967
                        "E": "21",
968
                        "F": "22"
969
                    }
970
                }
971
            ]
972
        },
973
        {
974
            "ocid": "6",
975
            "id": "7a",
976
            "testA": "8a",
977
            "test": {
978
                "id": "9a",
979
                "C": "10a"
980
            }
981
        }
982
    ]
983
}"""
984
    )
985

986

987
def test_unflatten_empty(tmpdir):
10✔
988
    input_dir = tmpdir.ensure("release_input", dir=True)
×
989
    input_dir.join("main.csv").write_text("ocid,id\n,\n,\n,", encoding="utf8")
×
990
    unflatten(
×
991
        input_dir.strpath,
992
        input_format="csv",
993
        output_name=tmpdir.join("release.json").strpath,
994
        main_sheet_name="main",
995
    )
996
    assert lines_strip_whitespace(
×
997
        tmpdir.join("release.json").read()
998
    ) == lines_strip_whitespace(
999
        """{
1000
        "main": []
1001
    }"""
1002
    )
1003

1004

1005
def test_unflatten_csv_utf8(tmpdir):
10✔
1006
    input_dir = tmpdir.ensure("release_input", dir=True)
×
1007
    input_dir.join("main.csv").write_text("ocid,id\n1,éαГ😼𝒞人\n", encoding="utf8")
×
1008
    unflatten(
×
1009
        input_dir.strpath,
1010
        input_format="csv",
1011
        # Should default to utf8
1012
        output_name=tmpdir.join("release.json").strpath,
1013
        main_sheet_name="main",
1014
    )
1015
    reloaded_json = json.load(tmpdir.join("release.json"))
×
1016
    assert reloaded_json == {"main": [{"ocid": "1", "id": "éαГ😼𝒞人"}]}
×
1017
    # The JSON we output should be UTF-8, rather than escaped ASCII
1018
    # https://github.com/OpenDataServices/flatten-tool/issues/71
1019
    assert "éαГ😼𝒞人" in tmpdir.join("release.json").read_text(encoding="utf-8")
×
1020

1021

1022
def test_unflatten_csv_latin1(tmpdir):
10✔
1023
    input_dir = tmpdir.ensure("release_input", dir=True)
×
1024
    input_dir.join("main.csv").write_text("ocid,id\n1,é\n", encoding="latin1")
×
1025
    unflatten(
×
1026
        input_dir.strpath,
1027
        input_format="csv",
1028
        encoding="latin1",
1029
        output_name=tmpdir.join("release.json").strpath,
1030
        main_sheet_name="main",
1031
    )
1032
    reloaded_json = json.load(tmpdir.join("release.json"))
×
1033
    assert reloaded_json == {"main": [{"ocid": "1", "id": "é"}]}
×
1034

1035

1036
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1037
def test_unflatten_unicode(tmpdir, input_format):
8✔
1038
    unflatten(
×
1039
        "flattentool/tests/fixtures/{}/unicode.{}".format(input_format, input_format),
1040
        input_format=input_format,
1041
        output_name=tmpdir.join("release.json").strpath,
1042
        main_sheet_name="main",
1043
    )
1044
    reloaded_json = json.load(tmpdir.join("release.json"))
×
1045
    assert reloaded_json == {"main": [{"ocid": 1, "id": "éαГ😼𝒞人"}]}
×
1046

1047

1048
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1049
def test_metatab(tmpdir, input_format):
8✔
1050
    tmpdir.join("metatab_schema.json").write('{"properties": {}}')
×
1051

1052
    unflatten(
×
1053
        "flattentool/tests/fixtures/{}/basic_meta.{}".format(
1054
            input_format, input_format
1055
        ),
1056
        input_format=input_format,
1057
        output_name=tmpdir.join("meta_unflattened.json").strpath,
1058
        metatab_name="Meta",
1059
        metatab_vertical_orientation=True,
1060
        metatab_schema=tmpdir.join("metatab_schema.json").strpath,
1061
        cell_source_map=tmpdir.join("meta_cell_source_map.json").strpath,
1062
        heading_source_map=tmpdir.join("meta_heading_source_map.json").strpath,
1063
    )
1064

1065
    metatab_json = json.load(tmpdir.join("meta_unflattened.json"))
×
1066

1067
    assert metatab_json == {
×
1068
        "a": "a1",
1069
        "b": "b1",
1070
        "c": "c1",
1071
        "main": [
1072
            {"colA": "cell1", "colB": "cell2"},
1073
            {"colA": "cell3", "colB": "cell4"},
1074
            {"colC": "cell5", "colD": "cell6"},
1075
            {"colC": "cell7", "colD": "cell8"},
1076
        ],
1077
    }
1078

1079
    cell_source_map = json.load(tmpdir.join("meta_cell_source_map.json"))
×
1080

1081
    assert cell_source_map == {
×
1082
        "": [["Meta", 2]],
1083
        "a": [["Meta", "1", 2, "a"]],
1084
        "b": [["Meta", "2", 2, "b"]],
1085
        "c": [["Meta", "3", 2, "c"]],
1086
        "main/0": [["main", 2]],
1087
        "main/0/colA": [["main", "A", 2, "colA"]],
1088
        "main/0/colB": [["main", "B", 2, "colB"]],
1089
        "main/1": [["main", 3]],
1090
        "main/1/colA": [["main", "A", 3, "colA"]],
1091
        "main/1/colB": [["main", "B", 3, "colB"]],
1092
        "main/2": [["subsheet", 2]],
1093
        "main/2/colC": [["subsheet", "A", 2, "colC"]],
1094
        "main/2/colD": [["subsheet", "B", 2, "colD"]],
1095
        "main/3": [["subsheet", 3]],
1096
        "main/3/colC": [["subsheet", "A", 3, "colC"]],
1097
        "main/3/colD": [["subsheet", "B", 3, "colD"]],
1098
    }
1099

1100
    heading_source_map = json.load(tmpdir.join("meta_heading_source_map.json"))
×
1101

1102
    assert heading_source_map == {
×
1103
        "a": [["Meta", "a"]],
1104
        "b": [["Meta", "b"]],
1105
        "c": [["Meta", "c"]],
1106
        "main/colA": [["main", "colA"]],
1107
        "main/colB": [["main", "colB"]],
1108
        "main/colC": [["subsheet", "colC"]],
1109
        "main/colD": [["subsheet", "colD"]],
1110
    }
1111

1112

1113
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1114
def test_metatab_only(tmpdir, input_format):
8✔
1115

1116
    unflatten(
×
1117
        "flattentool/tests/fixtures/{}/basic_meta.{}".format(
1118
            input_format, input_format
1119
        ),
1120
        input_format=input_format,
1121
        output_name=tmpdir.join("meta_unflattened.json").strpath,
1122
        metatab_name="Meta",
1123
        metatab_vertical_orientation=True,
1124
        metatab_only=True,
1125
        cell_source_map=tmpdir.join("meta_cell_source_map.json").strpath,
1126
        heading_source_map=tmpdir.join("meta_heading_source_map.json").strpath,
1127
    )
1128

1129
    metatab_json = json.load(tmpdir.join("meta_unflattened.json"))
×
1130

1131
    assert metatab_json == {"a": "a1", "b": "b1", "c": "c1"}
×
1132

1133
    cell_source_map = json.load(tmpdir.join("meta_cell_source_map.json"))
×
1134

1135
    assert cell_source_map == {
×
1136
        "": [["Meta", 2]],
1137
        "a": [["Meta", "1", 2, "a"]],
1138
        "b": [["Meta", "2", 2, "b"]],
1139
        "c": [["Meta", "3", 2, "c"]],
1140
    }
1141

1142
    heading_source_map = json.load(tmpdir.join("meta_heading_source_map.json"))
×
1143

1144
    assert heading_source_map == {
×
1145
        "a": [["Meta", "a"]],
1146
        "b": [["Meta", "b"]],
1147
        "c": [["Meta", "c"]],
1148
    }
1149

1150

1151
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1152
def test_metatab_with_base(tmpdir, input_format):
8✔
1153
    tmpdir.join("base_json.json").write("{}")
×
1154

1155
    with pytest.raises(Exception):
×
1156
        unflatten(
×
1157
            "flattentool/tests/fixtures/{}/basic_meta.{}".format(
1158
                input_format, input_format
1159
            ),
1160
            input_format=input_format,
1161
            output_name=tmpdir.join("meta_unflattened.json").strpath,
1162
            metatab_name="Meta",
1163
            metatab_vertical_orientation=True,
1164
            base_json=tmpdir.join("base_json.json").strpath,
1165
        )
1166

1167

1168
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1169
def test_bad_format(tmpdir, input_format):
8✔
1170
    with pytest.raises(Exception):
×
1171
        unflatten(
×
1172
            "flattentool/tests/fixtures/{}/basic_meta.{}".format(
1173
                input_format, input_format
1174
            ),
1175
            input_format="what",
1176
            output_name=tmpdir.join("meta_unflattened.json").strpath,
1177
        )
1178

1179
    with pytest.raises(Exception):
×
1180
        unflatten(
×
1181
            "flattentool/tests/fixtures/{}/basic_meta.{}".format(
1182
                input_format, input_format
1183
            ),
1184
            input_format=None,
1185
            output_name=tmpdir.join("meta_unflattened.json").strpath,
1186
        )
1187

1188

1189
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1190
def test_commands_single_sheet_spreadsheet(tmpdir, input_format):
8✔
1191

1192
    unflatten(
×
1193
        "flattentool/tests/fixtures/{}/commands_in_file.{}".format(
1194
            input_format, input_format
1195
        ),
1196
        input_format=input_format,
1197
        output_name=tmpdir.join("command_single_unflattened.json").strpath,
1198
        cell_source_map=tmpdir.join("command_single_source_map.json").strpath,
1199
        heading_source_map=tmpdir.join(
1200
            "command_single_heading_source_map.json"
1201
        ).strpath,
1202
    )
1203

1204
    unflattened = json.load(tmpdir.join("command_single_unflattened.json"))
×
1205

1206
    assert unflattened == {
×
1207
        "main": [{"actual": "actual", "headings": "data", "some": "some"}]
1208
    }
1209

1210

1211
def test_commands_single_sheet_csv(tmpdir):
10✔
1212
    unflatten(
×
1213
        "flattentool/tests/fixtures/csv/commands_in_file",
1214
        input_format="csv",
1215
        output_name=tmpdir.join("command_single_unflattened.json").strpath,
1216
        cell_source_map=tmpdir.join("command_single_source_map.json").strpath,
1217
        heading_source_map=tmpdir.join(
1218
            "command_single_heading_source_map.json"
1219
        ).strpath,
1220
    )
1221
    unflattened = json.load(tmpdir.join("command_single_unflattened.json"))
×
1222
    assert unflattened == {
×
1223
        "main": [{"actual": "actual", "headings": "data", "some": "some"}]
1224
    }
1225

1226

1227
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1228
def test_commands_metatab(tmpdir, input_format):
8✔
1229

1230
    unflatten(
×
1231
        "flattentool/tests/fixtures/{}/commands_in_metatab.{}".format(
1232
            input_format, input_format
1233
        ),
1234
        input_format=input_format,
1235
        output_name=tmpdir.join("command_metatab_unflattened.json").strpath,
1236
        cell_source_map=tmpdir.join("command_metatab_source_map.json").strpath,
1237
        heading_source_map=tmpdir.join(
1238
            "command_metatab_heading_source_map.json"
1239
        ).strpath,
1240
        metatab_name="Meta",
1241
        metatab_vertical_orientation=True,
1242
    )
1243

1244
    unflattened = json.load(tmpdir.join("command_metatab_unflattened.json"))
×
1245

1246
    assert unflattened == {
×
1247
        "main": [
1248
            {"actual": "actual", "headings": "data", "some": "some"},
1249
            {"actual": "actual", "headings": "Other data", "some": "some"},
1250
        ],
1251
        "some": "data",
1252
        "anumber": 2,
1253
    }
1254

1255

1256
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1257
def test_commands_single_sheet_default(tmpdir, input_format):
8✔
1258

1259
    unflatten(
×
1260
        "flattentool/tests/fixtures/{}/commands_defaulted.{}".format(
1261
            input_format, input_format
1262
        ),
1263
        input_format=input_format,
1264
        output_name=tmpdir.join("command_single_unflattened.json").strpath,
1265
        cell_source_map=tmpdir.join("command_single_source_map.json").strpath,
1266
        heading_source_map=tmpdir.join(
1267
            "command_single_heading_source_map.json"
1268
        ).strpath,
1269
        default_configuration="SkipRows 1, headerrows 2",
1270
    )
1271

1272
    unflattened = json.load(tmpdir.join("command_single_unflattened.json"))
×
1273

1274
    assert unflattened == {
×
1275
        "main": [{"actual": "actual", "headings": "data", "some": "some"}]
1276
    }
1277

1278
    unflatten(
×
1279
        "flattentool/tests/fixtures/{}/commands_defaulted.{}".format(
1280
            input_format, input_format
1281
        ),
1282
        input_format=input_format,
1283
        output_name=tmpdir.join("command_single_unflattened.json").strpath,
1284
        cell_source_map=tmpdir.join("command_single_source_map.json").strpath,
1285
        heading_source_map=tmpdir.join(
1286
            "command_single_heading_source_map.json"
1287
        ).strpath,
1288
        default_configuration="SkipRows 1",
1289
    )
1290

1291
    unflattened = json.load(tmpdir.join("command_single_unflattened.json"))
×
1292

1293
    assert unflattened == {
×
1294
        "main": [
1295
            {"actual": "other", "headings": "headings", "some": "some"},
1296
            {"actual": "actual", "headings": "data", "some": "some"},
1297
        ]
1298
    }
1299

1300

1301
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1302
def test_commands_default_override(tmpdir, input_format):
8✔
1303

1304
    unflatten(
×
1305
        "flattentool/tests/fixtures/{}/commands_in_metatab_defaulted.{}".format(
1306
            input_format, input_format
1307
        ),
1308
        input_format=input_format,
1309
        output_name=tmpdir.join("command_metatab_unflattened.json").strpath,
1310
        cell_source_map=tmpdir.join("command_metatab_source_map.json").strpath,
1311
        heading_source_map=tmpdir.join(
1312
            "command_metatab_heading_source_map.json"
1313
        ).strpath,
1314
        metatab_name="Meta",
1315
        metatab_vertical_orientation=True,
1316
        default_configuration="headerrows 2",
1317
    )
1318

1319
    unflattened = json.load(tmpdir.join("command_metatab_unflattened.json"))
×
1320

1321
    # In this case want both 'headerrows 2' and 'skiprows 1' (which is defined in the metatab) to be used,
1322
    # as we only override individual commands not all of them,
1323
    # So the results in this case will be the same as if using commands_in_metatab.xlsx (where all commands are in metatab).
1324

1325
    assert unflattened == {
×
1326
        "main": [
1327
            {"actual": "actual", "headings": "data", "some": "some"},
1328
            {"actual": "actual", "headings": "Other data", "some": "some"},
1329
        ],
1330
        "some": "data",
1331
    }
1332

1333

1334
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1335
def test_commands_ignore(tmpdir, input_format):
8✔
1336

1337
    unflatten(
×
1338
        "flattentool/tests/fixtures/{}/commands_ignore.{}".format(
1339
            input_format, input_format
1340
        ),
1341
        input_format=input_format,
1342
        output_name=tmpdir.join("command_single_unflattened.json").strpath,
1343
        cell_source_map=tmpdir.join("command_single_source_map.json").strpath,
1344
        heading_source_map=tmpdir.join(
1345
            "command_single_heading_source_map.json"
1346
        ).strpath,
1347
    )
1348

1349
    unflattened = json.load(tmpdir.join("command_single_unflattened.json"))
×
1350

1351
    assert unflattened == {
×
1352
        "main": [{"actual": "actual", "headings": "data", "some": "some"}]
1353
    }
1354

1355

1356
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1357
def test_commands_hashcomments(tmpdir, input_format):
8✔
1358

1359
    unflatten(
×
1360
        "flattentool/tests/fixtures/{}/commands_hashcomments.{}".format(
1361
            input_format, input_format
1362
        ),
1363
        input_format=input_format,
1364
        output_name=tmpdir.join("commands_hashcomments_unflattened.json").strpath,
1365
        cell_source_map=tmpdir.join("commands_hashcomments_source_map.json").strpath,
1366
        heading_source_map=tmpdir.join(
1367
            "commands_hashcomments_heading_source_map.json"
1368
        ).strpath,
1369
        metatab_name="Meta",
1370
        metatab_vertical_orientation=True,
1371
    )
1372

1373
    unflattened = json.load(tmpdir.join("commands_hashcomments_unflattened.json"))
×
1374

1375
    assert unflattened == {
×
1376
        "main": [
1377
            {"actual": "actual", "headings": "data", "some": "some"},
1378
            {"actual": "actual", "headings": "Other data", "some": "some"},
1379
        ],
1380
        "some": "data",
1381
    }
1382

1383

1384
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1385
def test_commands_hashcomments_sourcemap(tmpdir, input_format):
8✔
1386

1387
    unflatten(
×
1388
        "flattentool/tests/fixtures/{}/commands_hashcomments_sourcemap.{}".format(
1389
            input_format, input_format
1390
        ),
1391
        input_format=input_format,
1392
        output_name=tmpdir.join("commands_hashcomments_unflattened.json").strpath,
1393
        cell_source_map=tmpdir.join("commands_hashcomments_source_map.json").strpath,
1394
        heading_source_map=tmpdir.join(
1395
            "commands_hashcomments_heading_source_map.json"
1396
        ).strpath,
1397
        metatab_name="Meta",
1398
        metatab_vertical_orientation=True,
1399
    )
1400

1401
    unflattened = json.load(tmpdir.join("commands_hashcomments_unflattened.json"))
×
1402
    cell_source_map = json.load(tmpdir.join("commands_hashcomments_source_map.json"))
×
1403

1404
    assert unflattened == {
×
1405
        "publishedDate": "2019-06-20T00:00:00Z",
1406
        "publisher": {"name": "Open Data Services Co-operative"},
1407
        "uri": "http://www.example.com",
1408
        "version": "1.1",
1409
        "main": [{"date": "2010-03-15T09:30:00Z", "id": "Ocds-1"}],
1410
    }
1411

1412
    # check fields have correct column letters
1413
    assert cell_source_map["main/0/date"][0][1] == "E"
×
1414
    assert cell_source_map["main/0/id"][0][1] == "C"
×
1415

1416

1417
@pytest.mark.parametrize("input_format", ["xlsx", "ods"])
10✔
1418
def test_commands_id_name(tmpdir, input_format):
8✔
1419

1420
    unflatten(
×
1421
        "flattentool/tests/fixtures/{}/commands_id_name.{}".format(
1422
            input_format, input_format
1423
        ),
1424
        input_format=input_format,
1425
        output_name=tmpdir.join("commands_id_name_unflattened.json").strpath,
1426
        cell_source_map=tmpdir.join("commands_id_name_source_map.json").strpath,
1427
        heading_source_map=tmpdir.join(
1428
            "commands_id_name_heading_source_map.json"
1429
        ).strpath,
1430
        metatab_name="Meta",
1431
        metatab_vertical_orientation=True,
1432
    )
1433

1434
    unflattened = json.load(tmpdir.join("commands_id_name_unflattened.json"))
×
1435

1436
    assert unflattened == {
×
1437
        "someroot": [
1438
            {
1439
                "actual": "actual",
1440
                "headings": "data",
1441
                "someId": "some",
1442
                "someArray": [
1443
                    {"heading1": "more data", "heading2": "other data"},
1444
                    {"heading1": "more more data", "heading2": "more other data"},
1445
                ],
1446
            }
1447
        ],
1448
        "some": "data",
1449
    }
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc