• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 12750796701

13 Jan 2025 03:42PM UTC coverage: 57.438% (-0.01%) from 57.448%
12750796701

push

github

web-flow
Merge pull request #2109 from HicServices/bugfix/rdmp-152-simplefileextractor

fix simple file extractor check error

11298 of 21223 branches covered (53.23%)

Branch coverage included in aggregate %.

32145 of 54411 relevant lines covered (59.08%)

17155.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.33
/Rdmp.Core/DataExport/DataExtraction/Pipeline/SimpleFileExtractor.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Collections.Generic;
9
using System.Data;
10
using System.IO;
11
using System.Linq;
12
using Rdmp.Core.Curation.Data;
13
using Rdmp.Core.DataExport.DataExtraction.Commands;
14
using Rdmp.Core.DataFlowPipeline;
15
using Rdmp.Core.ReusableLibraryCode;
16
using Rdmp.Core.ReusableLibraryCode.Checks;
17
using Rdmp.Core.ReusableLibraryCode.Progress;
18

19
namespace Rdmp.Core.DataExport.DataExtraction.Pipeline;
20

21
/// <summary>
22
/// <para>
23
/// Component for copying directory trees or top level files from a location on disk to the output directory
24
/// of a project extraction.  Supports substituting private identifiers for release identifiers in top level
25
/// file/directory names.
26
/// </para>
27
/// <para>IMPORTANT: File extractor operates as part of the 'Extract Globals' section of the extraction pipeline.
28
/// This means that you must enable globals in the extraction for the component to operate.</para>
29
/// </summary>
30
public class SimpleFileExtractor : FileExtractor
31
{
32
    [DemandsInitialization("Location of files on disk that should be copied to the output directory", Mandatory = true)]
33
    public DirectoryInfo LocationOfFiles { get; set; }
46✔
34

35
    [DemandsInitialization(
36
        "True if the LocationOfFiles contains a number of directories to be copied.  False if it contains files only (no subdirectories)",
37
        Mandatory = true, DefaultValue = true)]
38
    public bool Directories { get; set; }
90✔
39

40
    [DemandsInitialization(
41
        "True if there is 1 or more files/folders per patient (if so Pattern must contain $p).  False if there is one arbitrary file/folder that needs copied once only",
42
        Mandatory = true, DefaultValue = true)]
43
    public bool PerPatient { get; set; }
40✔
44

45
    [DemandsInitialization(
46
        "Expected naming pattern of files to be moved.  If PerPatient is true then this should include the symbol $p to indicate the private identifier value of each patient to be moved e.g. $p.txt.  This symbol will be replaced in the file/path names (but not file body)",
47
        Mandatory = true)]
48
    public string Pattern { get; set; } = "$p";
80✔
49

50
    [DemandsInitialization(@"Directory where files should be put 
51
$p - Project Extraction Directory (e.g. c:\MyProject\)
52
$n - Project Number (e.g. 234)
53
$c - Configuration Extraction Directory  (e.g. c:\MyProject\Extractions\Extr_16)
54
", Mandatory = true, DefaultValue = "$c\\Files\\")]
55
    public string OutputDirectoryName { get; set; } = "$c\\Files\\";
32✔
56

57
    [DemandsInitialization(
58
        "Determines behaviour when the destination file already exists either due to an old run or cohort private identifier aliases.  Set to true to overwrite or false to crash.",
59
        DefaultValue = true)]
60
    public bool Overwrite { get; set; } = true;
40✔
61

62
    public override void Check(ICheckNotifier notifier)
63
    {
64
        base.Check(notifier);
16✔
65

66
        if (PerPatient && !Pattern.Contains("$p"))
16!
67
            notifier.OnCheckPerformed(
×
68
                new CheckEventArgs($"PerPatient is true but Pattern {Pattern} did not contain token $p",
×
69
                    CheckResult.Fail));
×
70

71
        if (!PerPatient && Pattern.Contains("$p"))
16!
72
            notifier.OnCheckPerformed(new CheckEventArgs(
×
73
                $"PerPatient is false but Pattern {Pattern} contains token $p.  This token will never be matched in MoveAll mode",
×
74
                CheckResult.Fail));
×
75
    }
16✔
76

77
    protected override void MoveFiles(ExtractGlobalsCommand command, IDataLoadEventListener listener,
78
        GracefulCancellationToken cancellationToken)
79
    {
80
        if (!LocationOfFiles.Exists)
×
81
            throw new Exception($"LocationOfFiles {LocationOfFiles} did not exist");
×
82

83
        var destinationDirectory = GetDestinationDirectory();
×
84

85
        if (!destinationDirectory.Exists)
×
86
            destinationDirectory.Create();
×
87

88
        if (PerPatient)
×
89
        {
90
            var cohort = command.Configuration.Cohort;
×
91
            var cohortData = cohort.FetchEntireCohort();
×
92

93
            var priv = cohort.GetPrivateIdentifier(true);
×
94
            var rel = cohort.GetReleaseIdentifier(true);
×
95

96
            foreach (DataRow r in cohortData.Rows)
×
97
                MovePatient(r[priv], r[rel], destinationDirectory, listener, cancellationToken);
×
98
        }
99
        else
100
        {
101
            MoveAll(destinationDirectory, listener, cancellationToken);
×
102
        }
103
    }
×
104

105
    /// <summary>
106
    /// Resolves tokens (if any) in OutputDirectoryName into a single path
107
    /// </summary>
108
    /// <returns></returns>
109
    public DirectoryInfo GetDestinationDirectory()
110
    {
111
        var path = OutputDirectoryName;
×
112

113
        if (path.Contains("$p")) path = path.Replace("$p", _command.Project.ExtractionDirectory);
×
114
        if (path.Contains("$n")) path = path.Replace("$n", _command.Project.ProjectNumber.ToString());
×
115

116
        if (path.Contains("$c"))
×
117
            path = path.Replace("$c",
×
118
                new ExtractionDirectory(_command.Project.ExtractionDirectory, _command.Configuration)
×
119
                    .ExtractionDirectoryInfo.FullName);
×
120

121
        return new DirectoryInfo(path);
×
122
    }
123

124
    /// <summary>
125
    /// Called when <see cref="PerPatient"/> is false.  Called once per extraction
126
    /// </summary>
127
    public virtual void MoveAll(DirectoryInfo destinationDirectory, IDataLoadEventListener listener,
128
        GracefulCancellationToken cancellationToken)
129
    {
130
        var atLeastOne = false;
8✔
131

132
        var infos = new List<FileSystemInfo>();
8✔
133

134
        if (Pattern.Contains('*'))
8!
135
        {
136
            infos.AddRange(LocationOfFiles.EnumerateFileSystemInfos(Pattern));
8✔
137
        }
138
        else
139
        {
140
            var f = LocationOfFiles.GetFiles()
×
141
                .FirstOrDefault(f => f.Name.Equals(Pattern, StringComparison.OrdinalIgnoreCase));
×
142

143
            if (f != null)
×
144
                infos.Add(f);
×
145

146
            var d = LocationOfFiles.GetDirectories()
×
147
                .FirstOrDefault(d => d.Name.Equals(Pattern, StringComparison.OrdinalIgnoreCase));
×
148

149
            if (d != null)
×
150
                infos.Add(d);
×
151
        }
152

153
        foreach (var e in infos)
64✔
154
        {
155
            if (Directories && e is DirectoryInfo dir)
24✔
156
            {
157
                var dest = Path.Combine(destinationDirectory.FullName, dir.Name);
6✔
158

159
                // Recursively copy all files from input path to destination path
160
                listener.OnNotify(this,
6✔
161
                    new NotifyEventArgs(ProgressEventType.Information,
6✔
162
                        $"Copying directory '{e.FullName}' to '{dest}'"));
6✔
163
                CopyFolder(e.FullName, dest);
6✔
164
                atLeastOne = true;
6✔
165
            }
166

167
            if (!Directories && e is FileInfo f)
24✔
168
            {
169
                var dest = Path.Combine(destinationDirectory.FullName, f.Name);
8✔
170
                listener.OnNotify(this,
8✔
171
                    new NotifyEventArgs(ProgressEventType.Information, $"Copying file '{f.FullName}' to '{dest}'"));
8✔
172
                File.Copy(f.FullName, dest, Overwrite);
8✔
173
                atLeastOne = true;
8✔
174
            }
175
        }
176

177
        if (!atLeastOne)
8!
178
            listener.OnNotify(this,
×
179
                new NotifyEventArgs(ProgressEventType.Warning,
×
180
                    $"No {(Directories ? "Directories" : "Files")} were found matching Pattern {Pattern} in {LocationOfFiles.FullName}"));
×
181
    }
8✔
182

183
    /// <summary>
184
    /// Called when <see cref="PerPatient"/> is true.  Called once per private identifier.  Note that it is possible for 2 private identifiers to map to the same release identifier - be careful
185
    /// </summary>
186
    public virtual void MovePatient(object privateIdentifier, object releaseIdentifier,
187
        DirectoryInfo destinationDirectory, IDataLoadEventListener listener,
188
        GracefulCancellationToken cancellationToken)
189
    {
190
        var atLeastOne = false;
16✔
191

192
        if (privateIdentifier == DBNull.Value || string.IsNullOrWhiteSpace(privateIdentifier?.ToString()))
16!
193
        {
194
            listener.OnNotify(this,
×
195
                new NotifyEventArgs(ProgressEventType.Warning,
×
196
                    "Skipped NULL private identifier found in cohort when trying to copy files"));
×
197
            return;
×
198
        }
199

200
        if (releaseIdentifier == DBNull.Value || string.IsNullOrWhiteSpace(releaseIdentifier?.ToString()))
16!
201
        {
202
            listener.OnNotify(this,
×
203
                new NotifyEventArgs(ProgressEventType.Error,
×
204
                    $"Found NULL release identifier in cohort when trying to copy files.  This is not allowed as it breaks file name substitutions.  Private identifier was {privateIdentifier}"));
×
205
            return;
×
206
        }
207

208
        // What we will be writing into the file/path names in place of the private identifier
209
        var releaseSub = UsefulStuff.RemoveIllegalFilenameCharacters(releaseIdentifier.ToString());
16✔
210

211
        var patternAfterTokenInsertion = Pattern.Replace("$p", privateIdentifier.ToString());
16✔
212

213
        foreach (var e in LocationOfFiles.EnumerateFileSystemInfos(patternAfterTokenInsertion))
52✔
214
        {
215
            if (Directories && e is DirectoryInfo dir)
10✔
216
            {
217
                var dest = Path.Combine(
6✔
218
                    destinationDirectory.FullName,
6✔
219
                    dir.Name.Replace(privateIdentifier.ToString(), releaseSub));
6✔
220

221
                // Recursively copy all files from input path to destination path
222
                listener.OnNotify(this,
6✔
223
                    new NotifyEventArgs(ProgressEventType.Information,
6✔
224
                        $"Copying directory '{e.FullName}' to '{dest}'"));
6✔
225
                CopyFolder(e.FullName, dest);
6✔
226
                atLeastOne = true;
6✔
227
            }
228

229
            if (!Directories && e is FileInfo f)
10✔
230
            {
231
                var dest = Path.Combine(
4✔
232
                    destinationDirectory.FullName,
4✔
233
                    f.Name.Replace(privateIdentifier.ToString(), releaseSub));
4✔
234

235
                listener.OnNotify(this,
4✔
236
                    new NotifyEventArgs(ProgressEventType.Information, $"Copying file '{f.FullName}' to '{dest}'"));
4✔
237
                File.Copy(f.FullName, dest, Overwrite);
4✔
238
                atLeastOne = true;
4✔
239
            }
240
        }
241

242
        if (!atLeastOne)
16✔
243
            listener.OnNotify(this,
6✔
244
                new NotifyEventArgs(ProgressEventType.Warning,
6✔
245
                    $"No {(Directories ? "Directories" : "Files")} were found matching Pattern {patternAfterTokenInsertion} in {LocationOfFiles.FullName}.  For private identifier '{privateIdentifier}'"));
6✔
246
    }
14✔
247

248
    protected void CopyFolder(string sourceFolder, string destFolder)
249
    {
250
        if (!Directory.Exists(destFolder))
12✔
251
            Directory.CreateDirectory(destFolder);
12✔
252
        var files = Directory.GetFiles(sourceFolder);
12✔
253
        foreach (var file in files)
48✔
254
        {
255
            var name = Path.GetFileName(file);
12✔
256
            var dest = Path.Combine(destFolder, name);
12✔
257
            File.Copy(file, dest, Overwrite);
12✔
258
        }
259

260
        var folders = Directory.GetDirectories(sourceFolder);
12✔
261
        foreach (var folder in folders)
24!
262
        {
263
            var name = Path.GetFileName(folder);
×
264
            var dest = Path.Combine(destFolder, name);
×
265
            CopyFolder(folder, dest);
×
266
        }
267
    }
12✔
268
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc