• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 6245535001

20 Sep 2023 07:44AM UTC coverage: 57.013%. First build
6245535001

push

github

web-flow
8.1.0 Release (#1628)

* Bump Newtonsoft.Json from 13.0.1 to 13.0.2

Bumps [Newtonsoft.Json](https://github.com/JamesNK/Newtonsoft.Json) from 13.0.1 to 13.0.2.
- [Release notes](https://github.com/JamesNK/Newtonsoft.Json/releases)
- [Commits](https://github.com/JamesNK/Newtonsoft.Json/compare/13.0.1...13.0.2)

---
updated-dependencies:
- dependency-name: Newtonsoft.Json
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump NLog from 5.0.5 to 5.1.0

Bumps [NLog](https://github.com/NLog/NLog) from 5.0.5 to 5.1.0.
- [Release notes](https://github.com/NLog/NLog/releases)
- [Changelog](https://github.com/NLog/NLog/blob/dev/CHANGELOG.md)
- [Commits](https://github.com/NLog/NLog/compare/v5.0.5...v5.1.0)

---
updated-dependencies:
- dependency-name: NLog
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump NLog from 5.0.5 to 5.1.0

* Fix -r flag - should have been --results-directory all along

* Bump Newtonsoft.Json from 13.0.1 to 13.0.2

* Bump YamlDotNet from 12.0.2 to 12.1.0

Bumps [YamlDotNet](https://github.com/aaubry/YamlDotNet) from 12.0.2 to 12.1.0.
- [Release notes](https://github.com/aaubry/YamlDotNet/releases)
- [Commits](https://github.com/aaubry/YamlDotNet/compare/v12.0.2...v12.1.0)

---
updated-dependencies:
- dependency-name: YamlDotNet
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Moq from 4.18.2 to 4.18.3

Bumps [Moq](https://github.com/moq/moq4) from 4.18.2 to 4.18.3.
- [Release notes](https://github.com/moq/moq4/releases)
- [Changelog](https://github.com/moq/moq4/blob/main/CHANGELOG.md)
- [Commits](https://github.com/moq/moq4/compare/v4.18.2...v4.18.3)

---
updated-dependencies:
- dependency-name: Moq
... (continued)

10732 of 20257 branches covered (0.0%)

Branch coverage included in aggregate %.

48141 of 48141 new or added lines in 1086 files covered. (100.0%)

30685 of 52388 relevant lines covered (58.57%)

7387.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

45.91
/Rdmp.Core/DataLoad/Engine/DataProvider/FromCache/CachedFileRetriever.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Collections.Generic;
9
using System.IO;
10
using System.Linq;
11
using FAnsi.Discovery;
12
using Rdmp.Core.Caching.Layouts;
13
using Rdmp.Core.Caching.Pipeline;
14
using Rdmp.Core.Curation;
15
using Rdmp.Core.Curation.Data;
16
using Rdmp.Core.Curation.Data.Cache;
17
using Rdmp.Core.Curation.Data.DataLoad;
18
using Rdmp.Core.DataFlowPipeline;
19
using Rdmp.Core.DataLoad.Engine.Job;
20
using Rdmp.Core.DataLoad.Engine.Job.Scheduling;
21
using Rdmp.Core.ReusableLibraryCode.Checks;
22
using Rdmp.Core.ReusableLibraryCode.Progress;
23

24
namespace Rdmp.Core.DataLoad.Engine.DataProvider.FromCache;
25

26
/// <summary>
27
/// Fetches all the ILoadProgresss in the ILoadMetadata, it then selects the first scheduled task which has work to be done (e.g. data is cached but not yet loaded).
28
/// Cached data is unzipped to the forLoading directory.  The Dispose method (which should be called after the entire DataLoad has completed successfully) will clear
29
/// out the cached file(s) that were loaded and update the schedule to indicate the successful loading of data
30
/// </summary>
31
public abstract class CachedFileRetriever : ICachedDataProvider
32
{
33
    [DemandsInitialization(
34
        "The LoadProgress (which must also have a CacheProgress with a valid Caching Pipeline associated with it)",
35
        mandatory: true)]
36
    public ILoadProgress LoadProgress { get; set; }
12✔
37

38
    [DemandsInitialization("Whether to unarchive the files into the ForLoading folder, or just copy them as is")]
39
    public bool ExtractFilesFromArchive { get; set; }
14✔
40

41
    public abstract void Initialize(ILoadDirectory directory, DiscoveredDatabase dbInfo);
42
    public abstract ExitCodeType Fetch(IDataLoadJob dataLoadJob, GracefulCancellationToken cancellationToken);
43

44
    #region Events
45

46
    public event CacheFileNotFoundHandler CacheFileNotFound;
47

48
    protected virtual void OnCacheFileNotFound(string message, Exception ex)
49
    {
50
        CacheFileNotFound?.Invoke(this, message, ex);
×
51
    }
×
52

53
    #endregion
54

55
    protected ICacheLayout CreateCacheLayout(ScheduledDataLoadJob job)
56
    {
57
        var cacheProgress = job.LoadProgress.CacheProgress ??
12!
58
                            throw new NullReferenceException("cacheProgress cannot be null");
12✔
59
        return CreateCacheLayout(cacheProgress, job);
12✔
60
    }
61

62
    protected virtual ICacheLayout CreateCacheLayout(ICacheProgress cacheProgress, IDataLoadEventListener listener)
63
    {
64
        var pipelineFactory = new CachingPipelineUseCase(cacheProgress);
4✔
65
        var destination = pipelineFactory.CreateDestinationOnly(listener);
4✔
66
        return destination.CreateCacheLayout();
4✔
67
    }
68

69
    protected static ScheduledDataLoadJob ConvertToScheduledJob(IDataLoadJob dataLoadJob)
70
    {
71
        var scheduledJob = dataLoadJob as ScheduledDataLoadJob ??
8!
72
                           throw new Exception(
8✔
73
                               "CachedFileRetriever can only be used in conjunction with a ScheduledDataLoadJob");
8✔
74
        return scheduledJob;
8✔
75
    }
76

77
    protected Dictionary<DateTime, FileInfo> GetDataLoadWorkload(ScheduledDataLoadJob job)
78
    {
79
        var cacheLayout = CreateCacheLayout(job);
8✔
80
        cacheLayout.CheckCacheFilesAvailability(job);
8✔
81

82
        _workload = new Dictionary<DateTime, FileInfo>();
8✔
83
        foreach (var date in job.DatesToRetrieve)
32✔
84
        {
85
            var fileInfo = cacheLayout.GetArchiveFileInfoForDate(date, job);
8✔
86

87
            if (fileInfo == null)
8!
88
                OnCacheFileNotFound(
×
89
                    $"Could not find cached file for date '{date}' for CacheLayout.ArchiveType {cacheLayout.ArchiveType} in cache at {job.LoadDirectory.Cache.FullName}",
×
90
                    null);
×
91
            else if (!fileInfo.Exists)
8!
92
                OnCacheFileNotFound(
×
93
                    $"Could not find cached file '{fileInfo.FullName}' for date {date} in cache at {job.LoadDirectory.Cache.FullName}",
×
94
                    null);
×
95

96
            _workload.Add(date, fileInfo);
8✔
97
        }
98

99
        return _workload;
8✔
100
    }
101

102
    private Dictionary<DateTime, FileInfo> _workload;
103

104
    private static string[] GetPathsRelativeToDirectory(FileInfo[] absoluteFilePaths, DirectoryInfo directory)
105
    {
106
        var relativeFilePaths = new List<string>();
8✔
107
        foreach (var path in absoluteFilePaths)
32✔
108
        {
109
            if (!path.FullName.StartsWith(directory.FullName))
8!
110
                throw new InvalidOperationException(
×
111
                    $"The file must be within {directory.FullName} (or a subdirectory thereof)");
×
112

113
            relativeFilePaths.Add(path.FullName.Replace(directory.FullName, ""));
8✔
114
        }
115

116
        return relativeFilePaths.ToArray();
8✔
117
    }
118

119
    private bool FilesInForLoadingMatchWorkload(ILoadDirectory directory)
120
    {
121
        var filesInForLoading = GetPathsRelativeToDirectory(
4✔
122
            directory.ForLoading.EnumerateFiles("*", SearchOption.AllDirectories).ToArray(), directory.ForLoading);
4✔
123
        var filesFromCache = GetPathsRelativeToDirectory(_workload.Values.ToArray(), directory.Cache);
4✔
124

125
        return filesInForLoading.OrderBy(t => t).SequenceEqual(filesFromCache.OrderBy(t => t));
12✔
126
    }
127

128
    protected void ExtractJobs(IDataLoadJob dataLoadJob)
129
    {
130
        // check to see if forLoading has anything in it and bail if it does
131
        if (dataLoadJob.LoadDirectory.ForLoading.EnumerateFileSystemInfos().Any())
8✔
132
        {
133
            // RDMPDEV-185
134
            // There are files in ForLoading, but do they match what we would expect to find? Need to make sure that they aren't from a different dataset and/or there is the expected number of files
135
            // We should already have a _workload
136
            if (_workload == null)
4!
137
                throw new InvalidOperationException(
×
138
                    "The workload has not been initialised, don't know what files are to be retrieved from the cache");
×
139

140
            if (!FilesInForLoadingMatchWorkload(dataLoadJob.LoadDirectory))
4✔
141
                throw new InvalidOperationException(
2✔
142
                    "The files in ForLoading do not match what this job expects to be loading from the cache. Please delete the files in ForLoading before re-attempting the data load.");
2✔
143

144
            dataLoadJob.OnNotify(this,
2✔
145
                new NotifyEventArgs(ProgressEventType.Warning, "ForLoading already has files, skipping extraction"));
2✔
146
            return;
2✔
147
        }
148

149
        var layout = CreateCacheLayout((ScheduledDataLoadJob)dataLoadJob);
4✔
150

151
        //extract all the jobs into the forLoading directory
152
        foreach (var job in _workload)
16✔
153
        {
154
            if (job.Value == null)
4✔
155
                continue;
156

157
            if (ExtractFilesFromArchive)
4!
158
            {
159
                var extractor = CreateExtractor(layout.ArchiveType);
×
160
                extractor.Extract(job, dataLoadJob.LoadDirectory.ForLoading, dataLoadJob);
×
161
            }
162
            else
163
            {
164
                dataLoadJob.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
4✔
165
                    $"Archive identified:{job.Value.FullName}"));
4✔
166

167
                // just copy the archives across
168
                var relativePath = GetPathRelativeToCacheRoot(dataLoadJob.LoadDirectory.Cache, job.Value);
4✔
169
                var absolutePath = Path.Combine(dataLoadJob.LoadDirectory.ForLoading.FullName, relativePath);
4✔
170
                if (!Directory.Exists(absolutePath))
4!
171
                    Directory.CreateDirectory(absolutePath);
×
172

173
                var destFileName = Path.Combine(absolutePath, job.Value.Name);
4✔
174
                job.Value.CopyTo(destFileName);
4✔
175
            }
176
        }
177
    }
4✔
178

179
    private static string GetPathRelativeToCacheRoot(DirectoryInfo cacheRoot, FileInfo fileInCache) => fileInCache
4✔
180
        .Directory.FullName.Replace(cacheRoot.FullName, "").TrimStart(Path.DirectorySeparatorChar);
4✔
181

182
    private static IArchivedFileExtractor CreateExtractor(CacheArchiveType cacheArchiveType)
183
    {
184
        return cacheArchiveType switch
×
185
        {
×
186
            CacheArchiveType.None => throw new Exception("At this stage a cache archive type must be specified"),
×
187
            CacheArchiveType.Zip => new ZipExtractor(),
×
188
            _ => throw new ArgumentOutOfRangeException(nameof(cacheArchiveType))
×
189
        };
×
190
    }
191

192
    public static bool Validate(ILoadDirectory destination)
193
    {
194
        if (destination.Cache == null)
×
195
            throw new NullReferenceException(
×
196
                $"Destination {destination.RootPath.FullName} does not have a 'Cache' folder");
×
197

198
        return !destination.Cache.Exists ? throw new DirectoryNotFoundException(destination.Cache.FullName) : true;
×
199
    }
200

201

202
    public void LoadCompletedSoDispose(ExitCodeType exitCode, IDataLoadEventListener postLoadEventListener)
203
    {
204
    }
2✔
205

206
    public void Check(ICheckNotifier notifier)
207
    {
208
        try
209
        {
210
            if (LoadProgress == null)
×
211
            {
212
                notifier.OnCheckPerformed(new CheckEventArgs("A LoadProgress must be selected for a Cache to run",
×
213
                    CheckResult.Fail));
×
214
                return;
×
215
            }
216

217
            var cp = LoadProgress.CacheProgress;
×
218

219
            if (cp == null)
×
220
            {
221
                notifier.OnCheckPerformed(
×
222
                    new CheckEventArgs(
×
223
                        "LoadProgress must have a CacheProgress associated with it to support CachedFileRetrieval",
×
224
                        CheckResult.Fail));
×
225
                return;
×
226
            }
227

228
            var layout = CreateCacheLayout(cp, new FromCheckNotifierToDataLoadEventListener(notifier));
×
229

230
            if (layout == null)
×
231
            {
232
                notifier.OnCheckPerformed(new CheckEventArgs("CacheLayout created was null!", CheckResult.Fail));
×
233
                return;
×
234
            }
235

236
            notifier.OnCheckPerformed(new CheckEventArgs($"Archive type is:{layout.ArchiveType}", CheckResult.Success));
×
237
            notifier.OnCheckPerformed(new CheckEventArgs($"DateFormat is:{layout.DateFormat}", CheckResult.Success));
×
238
            notifier.OnCheckPerformed(new CheckEventArgs($"Granularity is:{layout.CacheFileGranularity}",
×
239
                CheckResult.Success));
×
240

241
            notifier.OnCheckPerformed(new CheckEventArgs($"CacheLayout is:{layout}", CheckResult.Success));
×
242

243
            var filesFound = layout.CheckCacheFilesAvailability(new FromCheckNotifierToDataLoadEventListener(notifier));
×
244

245
            notifier.OnCheckPerformed(new CheckEventArgs($"Files Found In Cache:{filesFound}",
×
246
                filesFound ? CheckResult.Success : CheckResult.Warning));
×
247

248
            var d = layout.GetLoadCacheDirectory(new FromCheckNotifierToDataLoadEventListener(notifier));
×
249

250
            if (d == null)
×
251
            {
252
                notifier.OnCheckPerformed(new CheckEventArgs("Cache Directory was null!", CheckResult.Fail));
×
253
                return;
×
254
            }
255

256
            notifier.OnCheckPerformed(new CheckEventArgs($"Cache Directory Is:{d.FullName}", CheckResult.Success));
×
257
        }
×
258
        catch (Exception ex)
×
259
        {
260
            notifier.OnCheckPerformed(new CheckEventArgs($"Checking failed on {this}", CheckResult.Fail, ex));
×
261
        }
×
262
    }
×
263
}
264

265
public delegate void CacheFileNotFoundHandler(object sender, string message, Exception ex);
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc