• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 7194961165

13 Dec 2023 12:07PM UTC coverage: 56.776% (-0.2%) from 57.013%
7194961165

push

github

web-flow
Merge Latest Release into main (#1702)

* Bump YamlDotNet from 13.3.1 to 13.4.0

Bumps [YamlDotNet](https://github.com/aaubry/YamlDotNet) from 13.3.1 to 13.4.0.
- [Release notes](https://github.com/aaubry/YamlDotNet/releases)
- [Commits](https://github.com/aaubry/YamlDotNet/compare/v13.3.1...v13.4.0)

---
updated-dependencies:
- dependency-name: YamlDotNet
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump shogo82148/actions-setup-perl from 1.23.1 to 1.24.1

Bumps [shogo82148/actions-setup-perl](https://github.com/shogo82148/actions-setup-perl) from 1.23.1 to 1.24.1.
- [Release notes](https://github.com/shogo82148/actions-setup-perl/releases)
- [Commits](https://github.com/shogo82148/actions-setup-perl/compare/v1.23.1...v1.24.1)

---
updated-dependencies:
- dependency-name: shogo82148/actions-setup-perl
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* fix checkbox issue

* improve confirmation text (#1639)

* improve confirmation text
* Loop tidyup, use var where possible

---------

Co-authored-by: jas88 <j.a.sutherland@dundee.ac.uk>

* correct typo in create logging sql (#1640)

* Feature/ci codescan (#1641)

* Move SecurityCodescan.VS2019 to run on Github CI alone, integrate results with CodeQL
* Remove SecurityCodescan from Packages.md, no longer used via Nuget

---------

Co-authored-by: James A Sutherland <j@sutherland.pw>

* hide source control when not available

* Remove old Plugin object bits, tidy up (#1636)

* Remove old Plugin object bits, tidy up

* Purge remaining bits of AllExpiredPluginsNode

* Fix plugin display name in tree

* Update CreateNewDataExtractionProjectUI.cs

Casting fix

* Feature/rdmp42 delete plugins (#1642)

* add ui plugin delete functionality

* Warning and inherita... (continued)

10722 of 20351 branches covered (0.0%)

Branch coverage included in aggregate %.

215 of 789 new or added lines in 63 files covered. (27.25%)

39 existing lines in 16 files now uncovered.

30650 of 52518 relevant lines covered (58.36%)

7294.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.5
/Rdmp.Core/DataExport/DataExtraction/Pipeline/Sources/ExecuteDatasetExtractionSource.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Collections.Generic;
9
using System.Data;
10
using System.Diagnostics;
11
using System.Linq;
12
using System.Threading.Tasks;
13
using FAnsi;
14
using FAnsi.Discovery.QuerySyntax;
15
using Rdmp.Core.Curation.Data;
16
using Rdmp.Core.DataExport.Data;
17
using Rdmp.Core.DataExport.DataExtraction.Commands;
18
using Rdmp.Core.DataFlowPipeline;
19
using Rdmp.Core.DataFlowPipeline.Requirements;
20
using Rdmp.Core.DataLoad.Engine.Pipeline.Components;
21
using Rdmp.Core.DataLoad.Engine.Pipeline.Sources;
22
using Rdmp.Core.QueryBuilding;
23
using Rdmp.Core.ReusableLibraryCode;
24
using Rdmp.Core.ReusableLibraryCode.Checks;
25
using Rdmp.Core.ReusableLibraryCode.DataAccess;
26
using Rdmp.Core.ReusableLibraryCode.Progress;
27
using IContainer = Rdmp.Core.Curation.Data.IContainer;
28

29
namespace Rdmp.Core.DataExport.DataExtraction.Pipeline.Sources;
30

31
/// <summary>
32
/// Executes a single Dataset extraction by linking a cohort with a dataset (either core or custom data - See IExtractCommand).  Also calculates the number
33
/// of unique identifiers seen, records row validation failures etc.
34
/// </summary>
35
public class ExecuteDatasetExtractionSource : IPluginDataFlowSource<DataTable>, IPipelineRequirement<IExtractCommand>
36
{
37
    //Request is either for one of these
38
    public ExtractDatasetCommand Request { get; protected set; }
2,404✔
39
    public ExtractGlobalsCommand GlobalsRequest { get; protected set; }
134✔
40

41
    public const string AuditTaskName = "DataExtraction";
42

43
    private readonly List<string> _extractionIdentifiersidx = new();
66✔
44

45
    private bool _cancel;
46

47
    private ICatalogue _catalogue;
48

49
    protected const string ValidationColumnName = "RowValidationResult";
50

51
    public ExtractionTimeValidator ExtractionTimeValidator { get; protected set; }
102✔
52
    public Exception ValidationFailureException { get; protected set; }
×
53

54
    public HashSet<object> UniqueReleaseIdentifiersEncountered { get; set; }
7,044✔
55

56
    public ExtractionTimeTimeCoverageAggregator ExtractionTimeTimeCoverageAggregator { get; set; }
214✔
57

58
    [DemandsInitialization(
59
        "Determines the systems behaviour when an extraction query returns 0 rows.  Default (false) is that an error is reported.  If set to true (ticked) then instead a DataTable with 0 rows but all the correct headers will be generated usually resulting in a headers only 0 line/empty extract file")]
60
    public bool AllowEmptyExtractions { get; set; }
116✔
61

62
    [DemandsInitialization(
63
        "Batch size, number of records to read from source before releasing it into the extraction pipeline",
64
        DefaultValue = 10000, Mandatory = true)]
65
    public int BatchSize { get; set; }
110✔
66

67
    [DemandsInitialization(
68
        "In seconds. Overrides the global timeout for SQL query execution. Use 0 for infinite timeout.",
69
        DefaultValue = 50000, Mandatory = true)]
70
    public int ExecutionTimeout { get; set; }
110✔
71

72
    [DemandsInitialization(@"Determines how the system achieves DISTINCT on extraction.  These include:
73
None - Do not DISTINCT the records, can result in duplication in your extract (not recommended)
74
SqlDistinct - Adds the DISTINCT keyword to the SELECT sql sent to the server
75
OrderByAndDistinctInMemory - Adds an ORDER BY statement to the query and applies the DISTINCT in memory as records are read from the server (this can help when extracting very large data sets where DISTINCT keyword blocks record streaming until all records are ready to go)"
76
        , DefaultValue = DistinctStrategy.SqlDistinct)]
77
    public DistinctStrategy DistinctStrategy { get; set; }
216✔
78

79

80
    [DemandsInitialization("When DBMS is SqlServer then HASH JOIN should be used instead of regular JOINs")]
81
    public bool UseHashJoins { get; set; }
110✔
82

83
    [DemandsInitialization(
84
        "When DBMS is SqlServer and the extraction is for any of these datasets then HASH JOIN should be used instead of regular JOINs")]
85
    public Catalogue[] UseHashJoinsForCatalogues { get; set; }
108✔
86

87
    [DemandsInitialization(
88
        "Exclusion list.  A collection of Catalogues which will never be considered for HASH JOIN even when UseHashJoins is enabled.  Being on this list takes precedence for a Catalogue even if it is on UseHashJoinsForCatalogues.")]
89
    public Catalogue[] DoNotUseHashJoinsForCatalogues { get; set; }
110✔
90

91

92
    /// <summary>
93
    /// This is a dictionary containing all the CatalogueItems used in the query, the underlying datatype in the origin database and the
94
    /// actual datatype that was output after the transform operation e.g. a varchar(10) could be converted into a bona fide DateTime which
95
    /// would be an sql Date.  Finally
96
    /// a recommended SqlDbType is passed back.
97
    /// </summary>
98
    public Dictionary<ExtractableColumn, ExtractTimeTransformationObserved> ExtractTimeTransformationsObserved;
99

100
    private DbDataCommandDataFlowSource _hostedSource;
101

102
    protected virtual void Initialize(ExtractDatasetCommand request)
103
    {
104
        Request = request;
112✔
105

106
        if (request == ExtractDatasetCommand.EmptyCommand)
112!
107
            return;
×
108

109
        _timeSpentValidating = new Stopwatch();
112✔
110
        _timeSpentCalculatingDISTINCT = new Stopwatch();
112✔
111
        _timeSpentBuckettingDates = new Stopwatch();
112✔
112

113
        Request.ColumnsToExtract.Sort(); //ensure they are in the right order so we can record the release identifiers
112✔
114

115
        //if we have a cached builder already
116
        if (request.QueryBuilder == null)
112✔
117
            request.GenerateQueryBuilder();
60✔
118

119
        foreach (var substitution in Request.ReleaseIdentifierSubstitutions)
448✔
120
            _extractionIdentifiersidx.Add(substitution.GetRuntimeName());
112✔
121

122
        UniqueReleaseIdentifiersEncountered = new HashSet<object>();
112✔
123

124
        _catalogue = request.Catalogue;
112✔
125

126
        if (!string.IsNullOrWhiteSpace(_catalogue.ValidatorXML))
112!
127
            ExtractionTimeValidator = new ExtractionTimeValidator(_catalogue, request.ColumnsToExtract);
×
128

129
        //if there is a time periodicity ExtractionInformation (AND! it is among the columns the user selected to be extracted)
130
        if (_catalogue.TimeCoverage_ExtractionInformation_ID != null && request.ColumnsToExtract
112!
131
                .Cast<ExtractableColumn>().Any(c =>
112✔
132
                    c.CatalogueExtractionInformation_ID == _catalogue.TimeCoverage_ExtractionInformation_ID))
112✔
133
            ExtractionTimeTimeCoverageAggregator =
×
134
                new ExtractionTimeTimeCoverageAggregator(_catalogue, request.ExtractableCohort);
×
135
        else
136
            ExtractionTimeTimeCoverageAggregator = null;
112✔
137
    }
112✔
138

139
    private void Initialize(ExtractGlobalsCommand request)
140
    {
141
        GlobalsRequest = request;
4✔
142
    }
4✔
143

144
    public bool WasCancelled => _cancel;
90✔
145

146
    private Stopwatch _timeSpentValidating;
147
    private int _rowsValidated;
148

149
    private Stopwatch _timeSpentCalculatingDISTINCT;
150
    private Stopwatch _timeSpentBuckettingDates;
151
    private int _rowsBucketted;
152

153
    private bool firstChunk = true;
66✔
154
    private bool firstGlobalChunk = true;
66✔
155
    private int _rowsRead;
156

157
    private RowPeeker _peeker = new();
66✔
158

159
    public virtual DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
160
    {
161
        // we are in the Global Commands case, let's return an empty DataTable (not null)
162
        // so we can trigger the destination to extract the globals docs and sql
163
        if (GlobalsRequest != null)
112✔
164
        {
165
            GlobalsRequest.ElevateState(ExtractCommandState.WaitingForSQLServer);
4✔
166
            if (firstGlobalChunk)
4✔
167
            {
168
                //unless we are checking, start auditing
169
                StartAuditGlobals();
2✔
170

171
                firstGlobalChunk = false;
2✔
172
                return new DataTable(ExtractionDirectory.GLOBALS_DATA_NAME);
2✔
173
            }
174

175
            return null;
2✔
176
        }
177

178
        if (Request == null)
108!
179
            throw new Exception("Component has not been initialized before being asked to GetChunk(s)");
×
180

181
        Request.ElevateState(ExtractCommandState.WaitingForSQLServer);
108✔
182

183
        if (_cancel)
108!
184
            throw new Exception("User cancelled data extraction");
×
185

186
        if (_hostedSource == null)
108✔
187
        {
188
            StartAudit(Request.QueryBuilder.SQL);
62✔
189

190
            if (Request.DatasetBundle.DataSet.DisableExtraction)
62✔
191
                throw new Exception(
2✔
192
                    $"Cannot extract {Request.DatasetBundle.DataSet} because DisableExtraction is set to true");
2✔
193

194
            _hostedSource = new DbDataCommandDataFlowSource(GetCommandSQL(listener),
60✔
195
                $"ExecuteDatasetExtraction {Request.DatasetBundle.DataSet}",
60✔
196
                Request.GetDistinctLiveDatabaseServer().Builder,
60✔
197
                ExecutionTimeout)
60✔
198
            {
60✔
199
                // If we are running in batches then always allow empty extractions
60✔
200
                AllowEmptyResultSets = AllowEmptyExtractions || Request.IsBatchResume,
60✔
201
                BatchSize = BatchSize
60✔
202
            };
60✔
203
        }
204

205
        DataTable chunk = null;
106✔
206

207
        try
208
        {
209
            chunk = _hostedSource.GetChunk(listener, cancellationToken);
106✔
210

211

212
            chunk = _peeker.AddPeekedRowsIfAny(chunk);
106✔
213

214
            if (Request != null && Request.DatasetBundle.DataSet is not null && chunk is not null)
106✔
215
                chunk.TableName = $"{Request.DatasetBundle.DataSet}";
58✔
216

217
            //if we are trying to distinct the records in memory based on release id
218
            if (DistinctStrategy == DistinctStrategy.OrderByAndDistinctInMemory)
106!
219
            {
220
                var releaseIdentifierColumn = Request.ReleaseIdentifierSubstitutions.First().GetRuntimeName();
×
221

222
                if (chunk is { Rows.Count: > 0 })
×
223
                {
224
                    //last release id in the current chunk
225
                    var lastReleaseId = chunk.Rows[^1][releaseIdentifierColumn];
×
226

227
                    _peeker.AddWhile(_hostedSource, r => Equals(r[releaseIdentifierColumn], lastReleaseId), chunk);
×
228
                    chunk = MakeDistinct(chunk, listener, cancellationToken);
×
229
                }
230
            }
231
        }
106✔
232
        catch (AggregateException a)
×
233
        {
234
            if (a.GetExceptionIfExists<TaskCanceledException>() != null)
×
235
                _cancel = true;
×
236

237
            throw;
×
238
        }
239
        catch (Exception e)
×
240
        {
241
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Read from source failed", e));
×
242
        }
×
243

244
        if (cancellationToken.IsCancellationRequested)
106!
245
            throw new Exception("Data read cancelled because our cancellationToken was set, aborting data reading");
×
246

247
        //if the first chunk is null
248
        if (firstChunk && chunk == null && !AllowEmptyExtractions)
106✔
249
            throw new Exception(
2!
250
                $"There is no data to load, query returned no rows, query was:{Environment.NewLine}{_hostedSource.Sql ?? Request.QueryBuilder.SQL}");
2✔
251

252
        //not the first chunk anymore
253
        firstChunk = false;
104✔
254

255
        //data exhausted
256
        if (chunk == null)
104✔
257
        {
258
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
46✔
259
                $"Data exhausted after reading {_rowsRead} rows of data ({UniqueReleaseIdentifiersEncountered.Count} unique release identifiers seen)"));
46✔
260
            if (Request != null)
46✔
261
                Request.CumulativeExtractionResults.DistinctReleaseIdentifiersEncountered =
46!
262
                    Request.IsBatchResume ? -1 : UniqueReleaseIdentifiersEncountered.Count;
46✔
263
            return null;
46✔
264
        }
265

266
        _rowsRead += chunk.Rows.Count;
58✔
267
        //chunk will have datatypes for all the things in the buffer so we can populate our dictionary of facts about what columns/catalogue items have spontaneously changed name/type etc
268
        if (ExtractTimeTransformationsObserved == null)
58✔
269
            GenerateExtractionTransformObservations(chunk);
58✔
270

271

272
        //see if the SqlDataReader has a column with the same name as the ReleaseIdentifierSQL (if so then we can use it to count the number of distinct subjects written out to the csv)
273
        var includesReleaseIdentifier = _extractionIdentifiersidx.Count > 0;
58✔
274

275

276
        //first line - let's see what columns we wrote out
277
        //looks at the buffer and computes any transforms performed on the column
278

279

280
        _timeSpentValidating.Start();
58✔
281
        //build up the validation report (Missing/Wrong/Etc) - this has no mechanical effect on the extracted data just some metadata that goes into a flat file
282
        if (ExtractionTimeValidator != null && Request.IncludeValidation)
58!
283
            try
284
            {
285
                chunk.Columns.Add(ValidationColumnName);
×
286

287
                ExtractionTimeValidator.Validate(chunk, ValidationColumnName);
×
288

289
                _rowsValidated += chunk.Rows.Count;
×
290
                listener.OnProgress(this,
×
291
                    new ProgressEventArgs("Validation", new ProgressMeasurement(_rowsValidated, ProgressType.Records),
×
292
                        _timeSpentValidating.Elapsed));
×
293
            }
×
294
            catch (Exception ex)
×
295
            {
296
                listener.OnNotify(this,
×
297
                    new NotifyEventArgs(ProgressEventType.Error, "Could not validate data chunk", ex));
×
298
                ValidationFailureException = ex;
×
299
                ExtractionTimeValidator = null;
×
300
            }
×
301

302
        _timeSpentValidating.Stop();
58✔
303

304
        _timeSpentBuckettingDates.Start();
58✔
305
        if (ExtractionTimeTimeCoverageAggregator != null)
58!
306
        {
307
            _rowsBucketted += chunk.Rows.Count;
×
308

309
            foreach (DataRow row in chunk.Rows)
×
310
                ExtractionTimeTimeCoverageAggregator.ProcessRow(row);
×
311

312
            listener.OnProgress(this,
×
313
                new ProgressEventArgs("Bucketting Dates", new ProgressMeasurement(_rowsBucketted, ProgressType.Records),
×
314
                    _timeSpentCalculatingDISTINCT.Elapsed));
×
315
        }
316

317
        _timeSpentBuckettingDates.Stop();
58✔
318

319
        _timeSpentCalculatingDISTINCT.Start();
58✔
320
        //record unique release identifiers found
321
        if (includesReleaseIdentifier)
58✔
322
            foreach (var idx in _extractionIdentifiersidx)
324✔
323
            {
324
                foreach (DataRow r in chunk.Rows)
13,592✔
325
                {
326
                    if (r[idx] == DBNull.Value)
6,692!
327
                        if (_extractionIdentifiersidx.Count == 1)
×
328
                            throw new Exception(
×
329
                                $"Null release identifier found in extract of dataset {Request.DatasetBundle.DataSet}");
×
330
                        else
331
                            continue; //there are multiple extraction identifiers thats fine if one or two are null
332

333
                    UniqueReleaseIdentifiersEncountered.Add(r[idx]);
6,692✔
334
                }
335

336
                listener.OnProgress(this,
104✔
337
                    new ProgressEventArgs("Calculating Distinct Release Identifiers",
104✔
338
                        new ProgressMeasurement(UniqueReleaseIdentifiersEncountered.Count, ProgressType.Records),
104✔
339
                        _timeSpentCalculatingDISTINCT.Elapsed));
104✔
340
            }
341

342
        _timeSpentCalculatingDISTINCT.Stop();
58✔
343

344
        return chunk;
58✔
345
    }
346

347
    /// <summary>
348
    /// Makes the current batch ONLY distinct.  This only works if you have a bounded batch (see OrderByAndDistinctInMemory)
349
    /// </summary>
350
    /// <param name="chunk"></param>
351
    /// <param name="listener"></param>
352
    /// <param name="cancellationToken"></param>
353
    /// <returns></returns>
354
    private static DataTable MakeDistinct(DataTable chunk, IDataLoadEventListener listener,
355
        GracefulCancellationToken cancellationToken)
356
    {
357
        var removeDuplicates = new RemoveDuplicates { NoLogging = true };
×
358
        return removeDuplicates.ProcessPipelineData(chunk, listener, cancellationToken);
×
359
    }
360

361
    private void GenerateExtractionTransformObservations(DataTable chunk)
362
    {
363
        ExtractTimeTransformationsObserved = new Dictionary<ExtractableColumn, ExtractTimeTransformationObserved>();
58✔
364

365
        //create the Types dictionary
366
        foreach (ExtractableColumn column in Request.ColumnsToExtract)
844✔
367
        {
368
            ExtractTimeTransformationsObserved.Add(column, new ExtractTimeTransformationObserved());
364✔
369

370
            //record catalogue information about what it is supposed to be.
371
            if (!column.HasOriginalExtractionInformationVanished())
364✔
372
            {
373
                var extractionInformation = column.CatalogueExtractionInformation;
364✔
374

375
                //what the catalogue says it is
376
                ExtractTimeTransformationsObserved[column].DataTypeInCatalogue =
364✔
377
                    extractionInformation.ColumnInfo.Data_type;
364✔
378
                ExtractTimeTransformationsObserved[column].CatalogueItem = extractionInformation.CatalogueItem;
364✔
379

380
                //what it actually is
381
                if (chunk.Columns.Contains(column.GetRuntimeName()))
364!
382
                {
383
                    ExtractTimeTransformationsObserved[column].FoundAtExtractTime = true;
364✔
384
                    ExtractTimeTransformationsObserved[column].DataTypeObservedInRuntimeBuffer =
364✔
385
                        chunk.Columns[column.GetRuntimeName()].DataType;
364✔
386
                }
387
                else
388
                {
389
                    ExtractTimeTransformationsObserved[column].FoundAtExtractTime = false;
×
390
                }
391
            }
392
        }
393
    }
58✔
394

395
    private string GetCommandSQL(IDataLoadEventListener listener)
396
    {
397
        //if the user wants some custom logic for removing identical duplicates
398
        switch (DistinctStrategy)
399
        {
400
            //user doesn't care about identical duplicates
401
            case DistinctStrategy.None:
402
                ((QueryBuilder)Request.QueryBuilder).SetLimitationSQL("");
12!
403
                break;
12✔
404

405
            //system default behaviour
406
            case DistinctStrategy.SqlDistinct:
407
                break;
408

409
            //user wants to run order by the release ID and resolve duplicates in batches as they are read
410
            case DistinctStrategy.OrderByAndDistinctInMemory:
411

412
                //remove the DISTINCT keyword from the query
413
                ((QueryBuilder)Request.QueryBuilder).SetLimitationSQL("");
×
414

415
                //find the release identifier substitution (e.g. chi for PROCHI)
416
                var substitution = Request.ReleaseIdentifierSubstitutions.First();
×
417

418
                //add a line at the end of the query to ORDER BY the ReleaseId column (e.g. PROCHI)
419
                var orderBySql = $"ORDER BY {substitution.SelectSQL}";
×
420

421
                // don't add the line if it is already there (e.g. because of Retry)
422
                if (!Request.QueryBuilder.CustomLines.Any(l => string.Equals(l.Text, orderBySql)))
×
423
                    Request.QueryBuilder.AddCustomLine(orderBySql, QueryComponent.Postfix);
×
424

425
                break;
×
426
            default:
427
                throw new ArgumentOutOfRangeException();
×
428
        }
429

430
        var sql = Request.QueryBuilder.SQL;
60✔
431

432
        sql = HackExtractionSQL(sql, listener);
60✔
433

434
        if (ShouldUseHashedJoins())
60✔
435
        {
436
            //use hash joins!
437
            listener.OnNotify(this,
2✔
438
                new NotifyEventArgs(ProgressEventType.Information, "Substituting JOIN for HASH JOIN"));
2✔
439
            sql = sql.Replace(" JOIN ", " HASH JOIN ");
2✔
440
        }
441

442
        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
60✔
443
            $"/*Decided on extraction SQL:*/{Environment.NewLine}{sql}"));
60✔
444

445
        return sql;
60✔
446
    }
447

448
    private bool ShouldUseHashedJoins()
449
    {
450
        var dbms = Request?.QueryBuilder?.QuerySyntaxHelper?.DatabaseType;
60!
451

452
        //must be sql server
453
        if (dbms == null || dbms.Value != DatabaseType.MicrosoftSQLServer)
60!
454
            return false;
×
455

456
        // this Catalogue is explicilty marked as never hash join? i.e. its on the exclusion list
457
        if (DoNotUseHashJoinsForCatalogues?.Contains(Request.Catalogue) ?? false)
60!
458
            return false;
×
459

460
        if (UseHashJoins)
60✔
461
            return true;
2✔
462

463
        if (UseHashJoinsForCatalogues != null)
58!
464
            return UseHashJoinsForCatalogues.Contains(Request.Catalogue);
×
465

466
        //user doesn't want to use hash joins
467
        return false;
58✔
468
    }
469

470
    public virtual string HackExtractionSQL(string sql, IDataLoadEventListener listener) => sql;
52✔
471

472
    private void StartAudit(string sql)
473
    {
474
        var dataExportRepo = Request.DataExportRepository;
62✔
475

476
        var previousAudit = dataExportRepo
62✔
477
            .GetAllCumulativeExtractionResultsFor(Request.Configuration, Request.DatasetBundle.DataSet).ToArray();
62✔
478

479
        if (Request.IsBatchResume)
62!
480
        {
481
            var match =
×
482
                previousAudit.FirstOrDefault(a => a.ExtractableDataSet_ID == Request.DatasetBundle.DataSet.ID) ??
×
483
                throw new Exception(
×
484
                    $"Could not find previous CumulativeExtractionResults for dataset {Request.DatasetBundle.DataSet} despite the Request being marked as a batch resume");
×
485
            Request.CumulativeExtractionResults = match;
×
486
        }
487
        else
488
        {
489
            //delete old audit records
490
            foreach (var audit in previousAudit)
124!
491
                audit.DeleteInDatabase();
×
492

493
            var extractionResults = new CumulativeExtractionResults(dataExportRepo, Request.Configuration,
62✔
494
                Request.DatasetBundle.DataSet, sql);
62✔
495

496
            var filterDescriptions =
62✔
497
                RecursivelyListAllFilterNames(
62✔
498
                    Request.Configuration.GetFilterContainerFor(Request.DatasetBundle.DataSet));
62✔
499

500
            extractionResults.FiltersUsed = filterDescriptions.TrimEnd(',');
62✔
501
            extractionResults.SaveToDatabase();
62✔
502

503
            Request.CumulativeExtractionResults = extractionResults;
62✔
504
        }
505
    }
62✔
506

507
    private void StartAuditGlobals()
508
    {
509
        var repo = GlobalsRequest.RepositoryLocator.DataExportRepository;
2✔
510

511
        var previousAudit = repo
2✔
512
            .GetAllObjectsWhere<SupplementalExtractionResults>("ExtractionConfiguration_ID",
2✔
513
                GlobalsRequest.Configuration.ID)
2✔
514
            .Where(c => c.CumulativeExtractionResults_ID == null);
2✔
515

516
        //delete old audit records
517
        foreach (var audit in previousAudit)
4!
518
            audit.DeleteInDatabase();
×
519
    }
2✔
520

521
    private string RecursivelyListAllFilterNames(IContainer filterContainer)
522
    {
523
        if (filterContainer == null)
62✔
524
            return "";
60✔
525

526
        var toReturn = "";
2✔
527

528
        if (filterContainer.GetSubContainers() != null)
2✔
529
            foreach (var subContainer in filterContainer.GetSubContainers())
4!
530
                toReturn += RecursivelyListAllFilterNames(subContainer);
×
531

532
        if (filterContainer.GetFilters() != null)
2✔
533
            foreach (var f in filterContainer.GetFilters())
8✔
534
                toReturn += $"{f.Name},";
2✔
535

536
        return toReturn;
2✔
537
    }
538

539
    public virtual void Dispose(IDataLoadEventListener job, Exception pipelineFailureExceptionIfAny)
540
    {
541
    }
46✔
542

543
    public void Abort(IDataLoadEventListener listener)
544
    {
545
    }
×
546

547
    public virtual DataTable TryGetPreview()
548
    {
549
        if (Request == ExtractDatasetCommand.EmptyCommand)
×
550
            return new DataTable();
×
551

552
        var toReturn = new DataTable();
×
553
        toReturn.BeginLoadData();
×
554
        var server = _catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.DataExport, false);
×
555

556
        using var con = server.GetConnection();
×
557
        con.Open();
×
558

559
        var da = server.GetDataAdapter(Request.QueryBuilder.SQL, con);
×
560

561
        //get up to 1000 records
562
        da.Fill(0, 1000, toReturn);
×
NEW
563
        toReturn.EndLoadData();
×
564

565
        con.Close();
×
566

UNCOV
567
        return toReturn;
×
568
    }
×
569

570
    public void PreInitialize(IExtractCommand value, IDataLoadEventListener listener)
571
    {
572
        if (value is ExtractDatasetCommand datasetCommand)
116✔
573
            Initialize(datasetCommand);
112✔
574
        if (value is ExtractGlobalsCommand command)
116✔
575
            Initialize(command);
4✔
576
    }
116✔
577

578
    public virtual void Check(ICheckNotifier notifier)
579
    {
580
        if (Request == ExtractDatasetCommand.EmptyCommand)
×
581
        {
582
            notifier.OnCheckPerformed(new CheckEventArgs(
×
583
                "Request is ExtractDatasetCommand.EmptyCommand, checking will not be carried out",
×
584
                CheckResult.Warning));
×
585
            return;
×
586
        }
587

588
        if (GlobalsRequest != null)
×
589
        {
590
            notifier.OnCheckPerformed(new CheckEventArgs(
×
591
                "Request is for Globals, checking will not be carried out at source", CheckResult.Success));
×
592
            return;
×
593
        }
594

595
        if (Request == null)
×
596
        {
597
            notifier.OnCheckPerformed(new CheckEventArgs("ExtractionRequest has not been set", CheckResult.Fail));
×
598
            return;
×
599
        }
600
    }
×
601
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc