• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 6245535001

20 Sep 2023 07:44AM UTC coverage: 57.013%. First build
6245535001

push

github

web-flow
8.1.0 Release (#1628)

* Bump Newtonsoft.Json from 13.0.1 to 13.0.2

Bumps [Newtonsoft.Json](https://github.com/JamesNK/Newtonsoft.Json) from 13.0.1 to 13.0.2.
- [Release notes](https://github.com/JamesNK/Newtonsoft.Json/releases)
- [Commits](https://github.com/JamesNK/Newtonsoft.Json/compare/13.0.1...13.0.2)

---
updated-dependencies:
- dependency-name: Newtonsoft.Json
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump NLog from 5.0.5 to 5.1.0

Bumps [NLog](https://github.com/NLog/NLog) from 5.0.5 to 5.1.0.
- [Release notes](https://github.com/NLog/NLog/releases)
- [Changelog](https://github.com/NLog/NLog/blob/dev/CHANGELOG.md)
- [Commits](https://github.com/NLog/NLog/compare/v5.0.5...v5.1.0)

---
updated-dependencies:
- dependency-name: NLog
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump NLog from 5.0.5 to 5.1.0

* Fix -r flag - should have been --results-directory all along

* Bump Newtonsoft.Json from 13.0.1 to 13.0.2

* Bump YamlDotNet from 12.0.2 to 12.1.0

Bumps [YamlDotNet](https://github.com/aaubry/YamlDotNet) from 12.0.2 to 12.1.0.
- [Release notes](https://github.com/aaubry/YamlDotNet/releases)
- [Commits](https://github.com/aaubry/YamlDotNet/compare/v12.0.2...v12.1.0)

---
updated-dependencies:
- dependency-name: YamlDotNet
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Moq from 4.18.2 to 4.18.3

Bumps [Moq](https://github.com/moq/moq4) from 4.18.2 to 4.18.3.
- [Release notes](https://github.com/moq/moq4/releases)
- [Changelog](https://github.com/moq/moq4/blob/main/CHANGELOG.md)
- [Commits](https://github.com/moq/moq4/compare/v4.18.2...v4.18.3)

---
updated-dependencies:
- dependency-name: Moq
... (continued)

10732 of 20257 branches covered (0.0%)

Branch coverage included in aggregate %.

48141 of 48141 new or added lines in 1086 files covered. (100.0%)

30685 of 52388 relevant lines covered (58.57%)

7387.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.27
/Rdmp.Core/DataLoad/Modules/DataProvider/FlatFileManipulation/ExcelToCSVFilesConverter.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System.IO;
8
using System.Text;
9
using System.Text.RegularExpressions;
10
using FAnsi.Discovery;
11
using FAnsi.Implementations.MicrosoftSQL;
12
using NPOI.HSSF.UserModel;
13
using NPOI.SS.UserModel;
14
using NPOI.XSSF.UserModel;
15
using Rdmp.Core.Curation;
16
using Rdmp.Core.Curation.Data;
17
using Rdmp.Core.DataFlowPipeline;
18
using Rdmp.Core.DataFlowPipeline.Requirements;
19
using Rdmp.Core.DataLoad.Engine.DataProvider;
20
using Rdmp.Core.DataLoad.Engine.Job;
21
using Rdmp.Core.DataLoad.Modules.DataFlowSources;
22
using Rdmp.Core.ReusableLibraryCode.Checks;
23
using Rdmp.Core.ReusableLibraryCode.Extensions;
24
using Rdmp.Core.ReusableLibraryCode.Progress;
25

26
namespace Rdmp.Core.DataLoad.Modules.DataProvider.FlatFileManipulation;
27

28
/// <summary>
29
/// DLE component which converts Microsoft Excel Workbooks into CSV files.  Workbooks can have multiple worksheets in which case 1 csv will be created for
30
/// each worksheet.  Supports both .xls and .xlsx by using NPOI (i.e. not Interop).
31
/// </summary>
32
public class ExcelToCSVFilesConverter : IPluginDataProvider
33
{
34
    [DemandsInitialization("Pattern to match Excel files in forLoading directory", Mandatory = true)]
35
    public string ExcelFilePattern { get; set; }
18✔
36

37
    [DemandsInitialization(
38
        "Optional,if populated will only extract sheets that match the pattern e.g. '.*data$' will only extract worksheets whose names end with data")]
39
    public Regex WorksheetPattern { get; set; }
14✔
40

41
    [DemandsInitialization(
42
        "Normally a workbook called 'mywb.xlsx' with 2 worksheets 'sheet1' and 'sheet2' will produce csv files called 'sheet1.csv' and 'sheet2.csv'.  Setting this to true will add the workbook name as a prefix 'mywb_sheet1.csv' and 'mywb_sheet2.csv'",
43
        defaultValue: false)]
44
    public bool PrefixWithWorkbookName { get; set; }
18✔
45

46
    public void LoadCompletedSoDispose(ExitCodeType exitCode, IDataLoadEventListener postLoadEventsListener)
47
    {
48
    }
×
49

50
    public void Check(ICheckNotifier notifier)
51
    {
52
        if (string.IsNullOrWhiteSpace(ExcelFilePattern))
×
53
            notifier.OnCheckPerformed(new CheckEventArgs("Argument ExcelFilePattern has not been specified",
×
54
                CheckResult.Fail));
×
55
    }
×
56

57
    public void Initialize(ILoadDirectory directory, DiscoveredDatabase dbInfo)
58
    {
59
    }
×
60

61
    public ExitCodeType Fetch(IDataLoadJob job, GracefulCancellationToken cancellationToken)
62
    {
63
        var foundAtLeastOne = false;
8✔
64

65
        foreach (var f in job.LoadDirectory.ForLoading.GetFiles(ExcelFilePattern))
28✔
66
        {
67
            foundAtLeastOne = true;
6✔
68
            job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"About to process file {f.Name}"));
6✔
69
            ProcessFile(f, job);
6✔
70
        }
71

72
        if (!foundAtLeastOne)
8✔
73
            job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning,
2✔
74
                $"Did not find any files matching Pattern '{ExcelFilePattern}' in directory '{job.LoadDirectory.ForLoading.FullName}'"));
2✔
75

76
        return ExitCodeType.Success;
6✔
77
    }
78

79
    private void ProcessFile(FileInfo fileInfo, IDataLoadJob job)
80
    {
81
        using var fs = new FileStream(fileInfo.FullName, FileMode.Open);
6✔
82
        IWorkbook wb;
83
        if (fileInfo.Extension == ".xls")
6!
84
            wb = new HSSFWorkbook(fs);
×
85
        else
86
            wb = new XSSFWorkbook(fs);
6✔
87

88
        try
89
        {
90
            var source = new ExcelDataFlowSource();
6✔
91
            source.PreInitialize(new FlatFileToLoad(fileInfo), job);
6✔
92

93
            for (var i = 0; i < wb.NumberOfSheets; i++)
40✔
94
            {
95
                var sheet = wb.GetSheetAt(i);
14✔
96

97
                if (IsWorksheetNameMatch(sheet.SheetName))
14!
98
                {
99
                    job.OnNotify(this,
14✔
100
                        new NotifyEventArgs(ProgressEventType.Information,
14✔
101
                            $"Started processing worksheet:{sheet.SheetName}"));
14✔
102

103
                    var newName = PrefixWithWorkbookName
14✔
104
                        ? $"{Path.GetFileNameWithoutExtension(fileInfo.FullName)}_{sheet.SheetName}"
14✔
105
                        : sheet.SheetName;
14✔
106

107
                    //make it sensible
108
                    newName =
14✔
109
                        $"{MicrosoftQuerySyntaxHelper.Instance.GetSensibleEntityNameFromString(newName)}.csv";
14✔
110

111
                    var savePath = Path.Combine(job.LoadDirectory.ForLoading.FullName, newName);
14✔
112
                    var dt = source.GetAllData(sheet, job);
14✔
113
                    dt.EndLoadData();
14✔
114
                    using var saveStream = new StreamWriter(savePath, false, Encoding.UTF8, 1 << 20);
14✔
115
                    dt.SaveAsCsv(saveStream);
14✔
116

117
                    job.OnNotify(this,
14✔
118
                        new NotifyEventArgs(ProgressEventType.Information, $"Saved worksheet as {newName}"));
14✔
119
                }
120
                else
121
                {
122
                    job.OnNotify(this,
×
123
                        new NotifyEventArgs(ProgressEventType.Information,
×
124
                            $"Ignoring worksheet:{sheet.SheetName}"));
×
125
                }
126
            }
127
        }
6✔
128
        finally
129
        {
130
            wb.Close();
6✔
131
        }
6✔
132
    }
6✔
133

134
    private bool IsWorksheetNameMatch(string name) => WorksheetPattern?.IsMatch(name) != false;
14!
135
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc