• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 6237307473

19 Sep 2023 04:02PM UTC coverage: 57.015% (-0.4%) from 57.44%
6237307473

push

github

web-flow
Feature/rc4 (#1570)

* Syntax tidying
* Dependency updates
* Event handling singletons (ThrowImmediately and co)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: James A Sutherland <>
Co-authored-by: James Friel <jfriel001@dundee.ac.uk>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

10734 of 20259 branches covered (0.0%)

Branch coverage included in aggregate %.

5922 of 5922 new or added lines in 565 files covered. (100.0%)

30687 of 52390 relevant lines covered (58.57%)

7361.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.63
/Rdmp.Core/DataExport/DataExtraction/Pipeline/Sources/ExecuteDatasetExtractionSource.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Collections.Generic;
9
using System.Data;
10
using System.Diagnostics;
11
using System.Linq;
12
using System.Threading.Tasks;
13
using FAnsi;
14
using FAnsi.Discovery.QuerySyntax;
15
using Rdmp.Core.Curation.Data;
16
using Rdmp.Core.DataExport.Data;
17
using Rdmp.Core.DataExport.DataExtraction.Commands;
18
using Rdmp.Core.DataFlowPipeline;
19
using Rdmp.Core.DataFlowPipeline.Requirements;
20
using Rdmp.Core.DataLoad.Engine.Pipeline.Components;
21
using Rdmp.Core.DataLoad.Engine.Pipeline.Sources;
22
using Rdmp.Core.QueryBuilding;
23
using Rdmp.Core.ReusableLibraryCode;
24
using Rdmp.Core.ReusableLibraryCode.Checks;
25
using Rdmp.Core.ReusableLibraryCode.DataAccess;
26
using Rdmp.Core.ReusableLibraryCode.Progress;
27
using IContainer = Rdmp.Core.Curation.Data.IContainer;
28

29
namespace Rdmp.Core.DataExport.DataExtraction.Pipeline.Sources;
30

31
/// <summary>
32
/// Executes a single Dataset extraction by linking a cohort with a dataset (either core or custom data - See IExtractCommand).  Also calculates the number
33
/// of unique identifiers seen, records row validation failures etc.
34
/// </summary>
35
public class ExecuteDatasetExtractionSource : IPluginDataFlowSource<DataTable>, IPipelineRequirement<IExtractCommand>
36
{
37
    //Request is either for one of these
38
    public ExtractDatasetCommand Request { get; protected set; }
2,090✔
39
    public ExtractGlobalsCommand GlobalsRequest { get; protected set; }
134✔
40

41
    public const string AuditTaskName = "DataExtraction";
42

43
    private readonly List<string> _extractionIdentifiersidx = new();
66✔
44

45
    private bool _cancel;
46

47
    private ICatalogue _catalogue;
48

49
    protected const string ValidationColumnName = "RowValidationResult";
50

51
    public ExtractionTimeValidator ExtractionTimeValidator { get; protected set; }
102✔
52
    public Exception ValidationFailureException { get; protected set; }
×
53

54
    public HashSet<object> UniqueReleaseIdentifiersEncountered { get; set; }
7,044✔
55

56
    public ExtractionTimeTimeCoverageAggregator ExtractionTimeTimeCoverageAggregator { get; set; }
214✔
57

58
    [DemandsInitialization(
59
        "Determines the systems behaviour when an extraction query returns 0 rows.  Default (false) is that an error is reported.  If set to true (ticked) then instead a DataTable with 0 rows but all the correct headers will be generated usually resulting in a headers only 0 line/empty extract file")]
60
    public bool AllowEmptyExtractions { get; set; }
116✔
61

62
    [DemandsInitialization(
63
        "Batch size, number of records to read from source before releasing it into the extraction pipeline",
64
        DefaultValue = 10000, Mandatory = true)]
65
    public int BatchSize { get; set; }
110✔
66

67
    [DemandsInitialization(
68
        "In seconds. Overrides the global timeout for SQL query execution. Use 0 for infinite timeout.",
69
        DefaultValue = 50000, Mandatory = true)]
70
    public int ExecutionTimeout { get; set; }
110✔
71

72
    [DemandsInitialization(@"Determines how the system achieves DISTINCT on extraction.  These include:
73
None - Do not DISTINCT the records, can result in duplication in your extract (not recommended)
74
SqlDistinct - Adds the DISTINCT keyword to the SELECT sql sent to the server
75
OrderByAndDistinctInMemory - Adds an ORDER BY statement to the query and applies the DISTINCT in memory as records are read from the server (this can help when extracting very large data sets where DISTINCT keyword blocks record streaming until all records are ready to go)"
76
        , DefaultValue = DistinctStrategy.SqlDistinct)]
77
    public DistinctStrategy DistinctStrategy { get; set; }
216✔
78

79

80
    [DemandsInitialization("When DBMS is SqlServer then HASH JOIN should be used instead of regular JOINs")]
81
    public bool UseHashJoins { get; set; }
110✔
82

83
    [DemandsInitialization(
84
        "When DBMS is SqlServer and the extraction is for any of these datasets then HASH JOIN should be used instead of regular JOINs")]
85
    public Catalogue[] UseHashJoinsForCatalogues { get; set; }
108✔
86

87
    [DemandsInitialization(
88
        "Exclusion list.  A collection of Catalogues which will never be considered for HASH JOIN even when UseHashJoins is enabled.  Being on this list takes precedence for a Catalogue even if it is on UseHashJoinsForCatalogues.")]
89
    public Catalogue[] DoNotUseHashJoinsForCatalogues { get; set; }
110✔
90

91

92
    /// <summary>
93
    /// This is a dictionary containing all the CatalogueItems used in the query, the underlying datatype in the origin database and the
94
    /// actual datatype that was output after the transform operation e.g. a varchar(10) could be converted into a bona fide DateTime which
95
    /// would be an sql Date.  Finally
96
    /// a recommended SqlDbType is passed back.
97
    /// </summary>
98
    public Dictionary<ExtractableColumn, ExtractTimeTransformationObserved> ExtractTimeTransformationsObserved;
99

100
    private DbDataCommandDataFlowSource _hostedSource;
101

102
    protected virtual void Initialize(ExtractDatasetCommand request)
103
    {
104
        Request = request;
112✔
105

106
        if (request == ExtractDatasetCommand.EmptyCommand)
112!
107
            return;
×
108

109
        _timeSpentValidating = new Stopwatch();
112✔
110
        _timeSpentCalculatingDISTINCT = new Stopwatch();
112✔
111
        _timeSpentBuckettingDates = new Stopwatch();
112✔
112

113
        Request.ColumnsToExtract.Sort(); //ensure they are in the right order so we can record the release identifiers
112✔
114

115
        //if we have a cached builder already
116
        if (request.QueryBuilder == null)
112✔
117
            request.GenerateQueryBuilder();
60✔
118

119
        foreach (var substitution in Request.ReleaseIdentifierSubstitutions)
448✔
120
            _extractionIdentifiersidx.Add(substitution.GetRuntimeName());
112✔
121

122
        UniqueReleaseIdentifiersEncountered = new HashSet<object>();
112✔
123

124
        _catalogue = request.Catalogue;
112✔
125

126
        if (!string.IsNullOrWhiteSpace(_catalogue.ValidatorXML))
112!
127
            ExtractionTimeValidator = new ExtractionTimeValidator(_catalogue, request.ColumnsToExtract);
×
128

129
        //if there is a time periodicity ExtractionInformation (AND! it is among the columns the user selected to be extracted)
130
        if (_catalogue.TimeCoverage_ExtractionInformation_ID != null && request.ColumnsToExtract
112!
131
                .Cast<ExtractableColumn>().Any(c =>
112✔
132
                    c.CatalogueExtractionInformation_ID == _catalogue.TimeCoverage_ExtractionInformation_ID))
112✔
133
            ExtractionTimeTimeCoverageAggregator =
×
134
                new ExtractionTimeTimeCoverageAggregator(_catalogue, request.ExtractableCohort);
×
135
        else
136
            ExtractionTimeTimeCoverageAggregator = null;
112✔
137
    }
112✔
138

139
    private void Initialize(ExtractGlobalsCommand request)
140
    {
141
        GlobalsRequest = request;
4✔
142
    }
4✔
143

144
    public bool WasCancelled => _cancel;
90✔
145

146
    private Stopwatch _timeSpentValidating;
147
    private int _rowsValidated;
148

149
    private Stopwatch _timeSpentCalculatingDISTINCT;
150
    private Stopwatch _timeSpentBuckettingDates;
151
    private int _rowsBucketted;
152

153
    private bool firstChunk = true;
66✔
154
    private bool firstGlobalChunk = true;
66✔
155
    private int _rowsRead;
156

157
    private RowPeeker _peeker = new();
66✔
158

159
    public virtual DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
160
    {
161
        // we are in the Global Commands case, let's return an empty DataTable (not null)
162
        // so we can trigger the destination to extract the globals docs and sql
163
        if (GlobalsRequest != null)
112✔
164
        {
165
            GlobalsRequest.ElevateState(ExtractCommandState.WaitingForSQLServer);
4✔
166
            if (firstGlobalChunk)
4✔
167
            {
168
                //unless we are checking, start auditing
169
                StartAuditGlobals();
2✔
170

171
                firstGlobalChunk = false;
2✔
172
                return new DataTable(ExtractionDirectory.GLOBALS_DATA_NAME);
2✔
173
            }
174

175
            return null;
2✔
176
        }
177

178
        if (Request == null)
108!
179
            throw new Exception("Component has not been initialized before being asked to GetChunk(s)");
×
180

181
        Request.ElevateState(ExtractCommandState.WaitingForSQLServer);
108✔
182

183
        if (_cancel)
108!
184
            throw new Exception("User cancelled data extraction");
×
185

186
        if (_hostedSource == null)
108✔
187
        {
188
            StartAudit(Request.QueryBuilder.SQL);
62✔
189

190
            if (Request.DatasetBundle.DataSet.DisableExtraction)
62✔
191
                throw new Exception(
2✔
192
                    $"Cannot extract {Request.DatasetBundle.DataSet} because DisableExtraction is set to true");
2✔
193

194
            _hostedSource = new DbDataCommandDataFlowSource(GetCommandSQL(listener),
60✔
195
                $"ExecuteDatasetExtraction {Request.DatasetBundle.DataSet}",
60✔
196
                Request.GetDistinctLiveDatabaseServer().Builder,
60✔
197
                ExecutionTimeout)
60✔
198
            {
60✔
199
                // If we are running in batches then always allow empty extractions
60✔
200
                AllowEmptyResultSets = AllowEmptyExtractions || Request.IsBatchResume,
60✔
201
                BatchSize = BatchSize
60✔
202
            };
60✔
203
        }
204

205
        DataTable chunk = null;
106✔
206

207
        try
208
        {
209
            chunk = _hostedSource.GetChunk(listener, cancellationToken);
106✔
210

211
            chunk = _peeker.AddPeekedRowsIfAny(chunk);
106✔
212

213
            //if we are trying to distinct the records in memory based on release id
214
            if (DistinctStrategy == DistinctStrategy.OrderByAndDistinctInMemory)
106!
215
            {
216
                var releaseIdentifierColumn = Request.ReleaseIdentifierSubstitutions.First().GetRuntimeName();
×
217

218
                if (chunk is { Rows.Count: > 0 })
×
219
                {
220
                    //last release id in the current chunk
221
                    var lastReleaseId = chunk.Rows[^1][releaseIdentifierColumn];
×
222

223
                    _peeker.AddWhile(_hostedSource, r => Equals(r[releaseIdentifierColumn], lastReleaseId), chunk);
×
224
                    chunk = MakeDistinct(chunk, listener, cancellationToken);
×
225
                }
226
            }
227
        }
106✔
228
        catch (AggregateException a)
×
229
        {
230
            if (a.GetExceptionIfExists<TaskCanceledException>() != null)
×
231
                _cancel = true;
×
232

233
            throw;
×
234
        }
235
        catch (Exception e)
×
236
        {
237
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Read from source failed", e));
×
238
        }
×
239

240
        if (cancellationToken.IsCancellationRequested)
106!
241
            throw new Exception("Data read cancelled because our cancellationToken was set, aborting data reading");
×
242

243
        //if the first chunk is null
244
        if (firstChunk && chunk == null && !AllowEmptyExtractions)
106✔
245
            throw new Exception(
2!
246
                $"There is no data to load, query returned no rows, query was:{Environment.NewLine}{_hostedSource.Sql ?? Request.QueryBuilder.SQL}");
2✔
247

248
        //not the first chunk anymore
249
        firstChunk = false;
104✔
250

251
        //data exhausted
252
        if (chunk == null)
104✔
253
        {
254
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
46✔
255
                $"Data exhausted after reading {_rowsRead} rows of data ({UniqueReleaseIdentifiersEncountered.Count} unique release identifiers seen)"));
46✔
256
            if (Request != null)
46✔
257
                Request.CumulativeExtractionResults.DistinctReleaseIdentifiersEncountered =
46!
258
                    Request.IsBatchResume ? -1 : UniqueReleaseIdentifiersEncountered.Count;
46✔
259
            return null;
46✔
260
        }
261

262
        _rowsRead += chunk.Rows.Count;
58✔
263
        //chunk will have datatypes for all the things in the buffer so we can populate our dictionary of facts about what columns/catalogue items have spontaneously changed name/type etc
264
        if (ExtractTimeTransformationsObserved == null)
58✔
265
            GenerateExtractionTransformObservations(chunk);
58✔
266

267

268
        //see if the SqlDataReader has a column with the same name as the ReleaseIdentifierSQL (if so then we can use it to count the number of distinct subjects written out to the csv)
269
        var includesReleaseIdentifier = _extractionIdentifiersidx.Count > 0;
58✔
270

271

272
        //first line - let's see what columns we wrote out
273
        //looks at the buffer and computes any transforms performed on the column
274

275

276
        _timeSpentValidating.Start();
58✔
277
        //build up the validation report (Missing/Wrong/Etc) - this has no mechanical effect on the extracted data just some metadata that goes into a flat file
278
        if (ExtractionTimeValidator != null && Request.IncludeValidation)
58!
279
            try
280
            {
281
                chunk.Columns.Add(ValidationColumnName);
×
282

283
                ExtractionTimeValidator.Validate(chunk, ValidationColumnName);
×
284

285
                _rowsValidated += chunk.Rows.Count;
×
286
                listener.OnProgress(this,
×
287
                    new ProgressEventArgs("Validation", new ProgressMeasurement(_rowsValidated, ProgressType.Records),
×
288
                        _timeSpentValidating.Elapsed));
×
289
            }
×
290
            catch (Exception ex)
×
291
            {
292
                listener.OnNotify(this,
×
293
                    new NotifyEventArgs(ProgressEventType.Error, "Could not validate data chunk", ex));
×
294
                ValidationFailureException = ex;
×
295
                ExtractionTimeValidator = null;
×
296
            }
×
297

298
        _timeSpentValidating.Stop();
58✔
299

300
        _timeSpentBuckettingDates.Start();
58✔
301
        if (ExtractionTimeTimeCoverageAggregator != null)
58!
302
        {
303
            _rowsBucketted += chunk.Rows.Count;
×
304

305
            foreach (DataRow row in chunk.Rows)
×
306
                ExtractionTimeTimeCoverageAggregator.ProcessRow(row);
×
307

308
            listener.OnProgress(this,
×
309
                new ProgressEventArgs("Bucketting Dates", new ProgressMeasurement(_rowsBucketted, ProgressType.Records),
×
310
                    _timeSpentCalculatingDISTINCT.Elapsed));
×
311
        }
312

313
        _timeSpentBuckettingDates.Stop();
58✔
314

315
        _timeSpentCalculatingDISTINCT.Start();
58✔
316
        //record unique release identifiers found
317
        if (includesReleaseIdentifier)
58✔
318
            foreach (var idx in _extractionIdentifiersidx)
324✔
319
            {
320
                foreach (DataRow r in chunk.Rows)
13,592✔
321
                {
322
                    if (r[idx] == DBNull.Value)
6,692!
323
                        if (_extractionIdentifiersidx.Count == 1)
×
324
                            throw new Exception(
×
325
                                $"Null release identifier found in extract of dataset {Request.DatasetBundle.DataSet}");
×
326
                        else
327
                            continue; //there are multiple extraction identifiers thats fine if one or two are null
328

329
                    UniqueReleaseIdentifiersEncountered.Add(r[idx]);
6,692✔
330
                }
331

332
                listener.OnProgress(this,
104✔
333
                    new ProgressEventArgs("Calculating Distinct Release Identifiers",
104✔
334
                        new ProgressMeasurement(UniqueReleaseIdentifiersEncountered.Count, ProgressType.Records),
104✔
335
                        _timeSpentCalculatingDISTINCT.Elapsed));
104✔
336
            }
337

338
        _timeSpentCalculatingDISTINCT.Stop();
58✔
339

340
        return chunk;
58✔
341
    }
342

343
    /// <summary>
344
    /// Makes the current batch ONLY distinct.  This only works if you have a bounded batch (see OrderByAndDistinctInMemory)
345
    /// </summary>
346
    /// <param name="chunk"></param>
347
    /// <param name="listener"></param>
348
    /// <param name="cancellationToken"></param>
349
    /// <returns></returns>
350
    private static DataTable MakeDistinct(DataTable chunk, IDataLoadEventListener listener,
351
        GracefulCancellationToken cancellationToken)
352
    {
353
        var removeDuplicates = new RemoveDuplicates { NoLogging = true };
×
354
        return removeDuplicates.ProcessPipelineData(chunk, listener, cancellationToken);
×
355
    }
356

357
    private void GenerateExtractionTransformObservations(DataTable chunk)
358
    {
359
        ExtractTimeTransformationsObserved = new Dictionary<ExtractableColumn, ExtractTimeTransformationObserved>();
58✔
360

361
        //create the Types dictionary
362
        foreach (ExtractableColumn column in Request.ColumnsToExtract)
844✔
363
        {
364
            ExtractTimeTransformationsObserved.Add(column, new ExtractTimeTransformationObserved());
364✔
365

366
            //record catalogue information about what it is supposed to be.
367
            if (!column.HasOriginalExtractionInformationVanished())
364✔
368
            {
369
                var extractionInformation = column.CatalogueExtractionInformation;
364✔
370

371
                //what the catalogue says it is
372
                ExtractTimeTransformationsObserved[column].DataTypeInCatalogue =
364✔
373
                    extractionInformation.ColumnInfo.Data_type;
364✔
374
                ExtractTimeTransformationsObserved[column].CatalogueItem = extractionInformation.CatalogueItem;
364✔
375

376
                //what it actually is
377
                if (chunk.Columns.Contains(column.GetRuntimeName()))
364!
378
                {
379
                    ExtractTimeTransformationsObserved[column].FoundAtExtractTime = true;
364✔
380
                    ExtractTimeTransformationsObserved[column].DataTypeObservedInRuntimeBuffer =
364✔
381
                        chunk.Columns[column.GetRuntimeName()].DataType;
364✔
382
                }
383
                else
384
                {
385
                    ExtractTimeTransformationsObserved[column].FoundAtExtractTime = false;
×
386
                }
387
            }
388
        }
389
    }
58✔
390

391
    private string GetCommandSQL(IDataLoadEventListener listener)
392
    {
393
        //if the user wants some custom logic for removing identical duplicates
394
        switch (DistinctStrategy)
395
        {
396
            //user doesn't care about identical duplicates
397
            case DistinctStrategy.None:
398
                ((QueryBuilder)Request.QueryBuilder).SetLimitationSQL("");
12!
399
                break;
12✔
400

401
            //system default behaviour
402
            case DistinctStrategy.SqlDistinct:
403
                break;
404

405
            //user wants to run order by the release ID and resolve duplicates in batches as they are read
406
            case DistinctStrategy.OrderByAndDistinctInMemory:
407

408
                //remove the DISTINCT keyword from the query
409
                ((QueryBuilder)Request.QueryBuilder).SetLimitationSQL("");
×
410

411
                //find the release identifier substitution (e.g. chi for PROCHI)
412
                var substitution = Request.ReleaseIdentifierSubstitutions.First();
×
413

414
                //add a line at the end of the query to ORDER BY the ReleaseId column (e.g. PROCHI)
415
                var orderBySql = $"ORDER BY {substitution.SelectSQL}";
×
416

417
                // don't add the line if it is already there (e.g. because of Retry)
418
                if (!Request.QueryBuilder.CustomLines.Any(l => string.Equals(l.Text, orderBySql)))
×
419
                    Request.QueryBuilder.AddCustomLine(orderBySql, QueryComponent.Postfix);
×
420

421
                break;
×
422
            default:
423
                throw new ArgumentOutOfRangeException();
×
424
        }
425

426
        var sql = Request.QueryBuilder.SQL;
60✔
427

428
        sql = HackExtractionSQL(sql, listener);
60✔
429

430
        if (ShouldUseHashedJoins())
60✔
431
        {
432
            //use hash joins!
433
            listener.OnNotify(this,
2✔
434
                new NotifyEventArgs(ProgressEventType.Information, "Substituting JOIN for HASH JOIN"));
2✔
435
            sql = sql.Replace(" JOIN ", " HASH JOIN ");
2✔
436
        }
437

438
        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
60✔
439
            $"/*Decided on extraction SQL:*/{Environment.NewLine}{sql}"));
60✔
440

441
        return sql;
60✔
442
    }
443

444
    private bool ShouldUseHashedJoins()
445
    {
446
        var dbms = Request?.QueryBuilder?.QuerySyntaxHelper?.DatabaseType;
60!
447

448
        //must be sql server
449
        if (dbms == null || dbms.Value != DatabaseType.MicrosoftSQLServer)
60!
450
            return false;
×
451

452
        // this Catalogue is explicilty marked as never hash join? i.e. its on the exclusion list
453
        if (DoNotUseHashJoinsForCatalogues?.Contains(Request.Catalogue) ?? false)
60!
454
            return false;
×
455

456
        if (UseHashJoins)
60✔
457
            return true;
2✔
458

459
        if (UseHashJoinsForCatalogues != null)
58!
460
            return UseHashJoinsForCatalogues.Contains(Request.Catalogue);
×
461

462
        //user doesn't want to use hash joins
463
        return false;
58✔
464
    }
465

466
    public virtual string HackExtractionSQL(string sql, IDataLoadEventListener listener) => sql;
52✔
467

468
    private void StartAudit(string sql)
469
    {
470
        var dataExportRepo = Request.DataExportRepository;
62✔
471

472
        var previousAudit = dataExportRepo
62✔
473
            .GetAllCumulativeExtractionResultsFor(Request.Configuration, Request.DatasetBundle.DataSet).ToArray();
62✔
474

475
        if (Request.IsBatchResume)
62!
476
        {
477
            var match =
×
478
                previousAudit.FirstOrDefault(a => a.ExtractableDataSet_ID == Request.DatasetBundle.DataSet.ID) ??
×
479
                throw new Exception(
×
480
                    $"Could not find previous CumulativeExtractionResults for dataset {Request.DatasetBundle.DataSet} despite the Request being marked as a batch resume");
×
481
            Request.CumulativeExtractionResults = match;
×
482
        }
483
        else
484
        {
485
            //delete old audit records
486
            foreach (var audit in previousAudit)
124!
487
                audit.DeleteInDatabase();
×
488

489
            var extractionResults = new CumulativeExtractionResults(dataExportRepo, Request.Configuration,
62✔
490
                Request.DatasetBundle.DataSet, sql);
62✔
491

492
            var filterDescriptions =
62✔
493
                RecursivelyListAllFilterNames(
62✔
494
                    Request.Configuration.GetFilterContainerFor(Request.DatasetBundle.DataSet));
62✔
495

496
            extractionResults.FiltersUsed = filterDescriptions.TrimEnd(',');
62✔
497
            extractionResults.SaveToDatabase();
62✔
498

499
            Request.CumulativeExtractionResults = extractionResults;
62✔
500
        }
501
    }
62✔
502

503
    private void StartAuditGlobals()
504
    {
505
        var repo = GlobalsRequest.RepositoryLocator.DataExportRepository;
2✔
506

507
        var previousAudit = repo
2✔
508
            .GetAllObjectsWhere<SupplementalExtractionResults>("ExtractionConfiguration_ID",
2✔
509
                GlobalsRequest.Configuration.ID)
2✔
510
            .Where(c => c.CumulativeExtractionResults_ID == null);
2✔
511

512
        //delete old audit records
513
        foreach (var audit in previousAudit)
4!
514
            audit.DeleteInDatabase();
×
515
    }
2✔
516

517
    private string RecursivelyListAllFilterNames(IContainer filterContainer)
518
    {
519
        if (filterContainer == null)
62✔
520
            return "";
60✔
521

522
        var toReturn = "";
2✔
523

524
        if (filterContainer.GetSubContainers() != null)
2✔
525
            foreach (var subContainer in filterContainer.GetSubContainers())
4!
526
                toReturn += RecursivelyListAllFilterNames(subContainer);
×
527

528
        if (filterContainer.GetFilters() != null)
2✔
529
            foreach (var f in filterContainer.GetFilters())
8✔
530
                toReturn += $"{f.Name},";
2✔
531

532
        return toReturn;
2✔
533
    }
534

535
    public virtual void Dispose(IDataLoadEventListener job, Exception pipelineFailureExceptionIfAny)
536
    {
537
    }
46✔
538

539
    public void Abort(IDataLoadEventListener listener)
540
    {
541
    }
×
542

543
    public virtual DataTable TryGetPreview()
544
    {
545
        if (Request == ExtractDatasetCommand.EmptyCommand)
×
546
            return new DataTable();
×
547

548
        var toReturn = new DataTable();
×
549
        toReturn.BeginLoadData();
×
550
        var server = _catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.DataExport, false);
×
551

552
        using var con = server.GetConnection();
×
553
        con.Open();
×
554

555
        var da = server.GetDataAdapter(Request.QueryBuilder.SQL, con);
×
556

557
        //get up to 1000 records
558
        da.Fill(0, 1000, toReturn);
×
559
            toReturn.EndLoadData();
×
560

561
        con.Close();
×
562
        toReturn.EndLoadData();
×
563

564
        return toReturn;
×
565
    }
×
566

567
    public void PreInitialize(IExtractCommand value, IDataLoadEventListener listener)
568
    {
569
        if (value is ExtractDatasetCommand datasetCommand)
116✔
570
            Initialize(datasetCommand);
112✔
571
        if (value is ExtractGlobalsCommand command)
116✔
572
            Initialize(command);
4✔
573
    }
116✔
574

575
    public virtual void Check(ICheckNotifier notifier)
576
    {
577
        if (Request == ExtractDatasetCommand.EmptyCommand)
×
578
        {
579
            notifier.OnCheckPerformed(new CheckEventArgs(
×
580
                "Request is ExtractDatasetCommand.EmptyCommand, checking will not be carried out",
×
581
                CheckResult.Warning));
×
582
            return;
×
583
        }
584

585
        if (GlobalsRequest != null)
×
586
        {
587
            notifier.OnCheckPerformed(new CheckEventArgs(
×
588
                "Request is for Globals, checking will not be carried out at source", CheckResult.Success));
×
589
            return;
×
590
        }
591

592
        if (Request == null)
×
593
        {
594
            notifier.OnCheckPerformed(new CheckEventArgs("ExtractionRequest has not been set", CheckResult.Fail));
×
595
            return;
×
596
        }
597
    }
×
598
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc