• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 6237307473

19 Sep 2023 04:02PM UTC coverage: 57.015% (-0.4%) from 57.44%
6237307473

push

github

web-flow
Feature/rc4 (#1570)

* Syntax tidying
* Dependency updates
* Event handling singletons (ThrowImmediately and co)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: James A Sutherland <>
Co-authored-by: James Friel <jfriel001@dundee.ac.uk>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

10734 of 20259 branches covered (0.0%)

Branch coverage included in aggregate %.

5922 of 5922 new or added lines in 565 files covered. (100.0%)

30687 of 52390 relevant lines covered (58.57%)

7361.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.0
/Rdmp.Core/DataLoad/Modules/DataFlowOperations/Swapping/ColumnSwapper.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Collections.Generic;
9
using System.Data;
10
using System.Globalization;
11
using System.Linq;
12
using Rdmp.Core.CohortCommitting.Pipeline;
13
using Rdmp.Core.Curation.Data;
14
using Rdmp.Core.Curation.Data.Spontaneous;
15
using Rdmp.Core.DataExport.Data;
16
using Rdmp.Core.DataExport.DataExtraction.Commands;
17
using Rdmp.Core.DataFlowPipeline;
18
using Rdmp.Core.DataFlowPipeline.Requirements;
19
using Rdmp.Core.DataLoad.Modules.DataFlowOperations.Aliases;
20
using Rdmp.Core.DataLoad.Modules.DataFlowOperations.Aliases.Exceptions;
21
using Rdmp.Core.QueryBuilding;
22
using Rdmp.Core.Repositories;
23
using Rdmp.Core.ReusableLibraryCode.Checks;
24
using Rdmp.Core.ReusableLibraryCode.DataAccess;
25
using Rdmp.Core.ReusableLibraryCode.Progress;
26
using TypeGuesser;
27

28
namespace Rdmp.Core.DataLoad.Modules.DataFlowOperations.Swapping;
29

30
/// <summary>
31
/// Swaps values stored in a given column for values found in a mapping table (e.g. swap ReleaseID for PrivateID)
32
/// </summary>
33
internal class ColumnSwapper : IPluginDataFlowComponent<DataTable>, IPipelineOptionalRequirement<IExtractCommand>,
34
    IPipelineOptionalRequirement<ICohortCreationRequest>
35
{
36
    [DemandsInitialization(
37
        "The column in your pipeline containing input values you want swapped.  Leave null to use the same name as the MappingFromColumn")]
38
    public string InputFromColumn { get; set; }
50✔
39

40
    [DemandsInitialization(
41
        "Name for the column you want to create in the output stream of this component containing the mapped values.  Leave null to use the same name as the MappingToColumn")]
42
    public string OutputToColumn { get; set; }
50✔
43

44
    [DemandsInitialization("The column in your database which stores the input values you want mapped",
45
        Mandatory = true)]
46
    public ColumnInfo MappingFromColumn { get; set; }
236✔
47

48
    [DemandsInitialization("The column in your database which stores the output values you want emitted",
49
        Mandatory = true)]
50
    public ColumnInfo MappingToColumn { get; set; }
210✔
51

52
    [DemandsInitialization(@"Optional text to add when generating the mapping table. Should not start with WHERE.
53

54
If Pipeline execution environment contains a Project then the following replacements are available:
55
    $p - Project Name ('e.g. My Project')
56
    $n - Project Number (e.g. 234)
57
    $t - Master Ticket (e.g. 'LINK-1234')
58

59
If Pipeline execution environment contains an ExtractionConfiguration then the following additional replacements are available:
60
    $r - Request Ticket (e.g. 'LINK-1234')
61
    $l - Release Ticket (e.g. 'LINK-1234')", DemandType = DemandType.SQL, ContextText = "WHERE")]
62
    public virtual string WHERELogic { get; set; }
114✔
63

64
    [DemandsInitialization("Determines behaviour when the same input value maps to multiple output values",
65
        DefaultValue = AliasResolutionStrategy.CrashIfAliasesFound)]
66
    public AliasResolutionStrategy AliasResolutionStrategy { get; set; }
8✔
67

68
    [DemandsInitialization(@"Determines behaviour when no mapping is found for an input value:
69
True - Crash the load
70
False - Drop the row from the DataTable (and issue a warning)", DefaultValue = true)]
71
    public bool CrashIfNoMappingsFound { get; set; }
8✔
72

73
    [DemandsInitialization("Timeout to set on fetching the mapping table", DefaultValue = 30)]
74
    public int Timeout { get; set; }
30✔
75

76
    [DemandsInitialization(
77
        @"Setting this to true will leave the original input column in your DataTable (so your table will have both input and output columns instead of a substitution)",
78
        DefaultValue = true)]
79
    public bool KeepInputColumnToo { get; set; }
34✔
80

81
    private CultureInfo _culture;
82

83
    [DemandsInitialization("The culture to use e.g. when Type translations are required")]
84
    public CultureInfo Culture
85
    {
86
        get => _culture ?? CultureInfo.CurrentCulture;
2✔
87
        set => _culture = value;
×
88
    }
89

90
    private Dictionary<object, List<object>> _mappingTable;
91

92
    /// <summary>
93
    /// The Type of objects that are stored in the Keys of <see cref="_mappingTable"/>.  For use when input types do not match the mapping table types
94
    /// </summary>
95
    private Type _keyType;
96

97

98
    protected IProject _project;
99
    protected IExtractionConfiguration _configuration;
100

101
    public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener,
102
        GracefulCancellationToken cancellationToken)
103
    {
104
        var fromColumnName = string.IsNullOrWhiteSpace(InputFromColumn)
34✔
105
            ? MappingFromColumn.GetRuntimeName()
34✔
106
            : InputFromColumn;
34✔
107
        var toColumnName = string.IsNullOrWhiteSpace(OutputToColumn)
34✔
108
            ? MappingToColumn.GetRuntimeName()
34✔
109
            : OutputToColumn;
34✔
110

111
        var inPlace = string.Equals(fromColumnName, toColumnName);
34✔
112

113
        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to build mapping table"));
34✔
114

115
        if (!toProcess.Columns.Contains(fromColumnName))
34✔
116
            throw new Exception($"DataTable did not contain a field called '{fromColumnName}'");
4✔
117

118
        if (!inPlace && toProcess.Columns.Contains(toColumnName))
30!
119
            throw new Exception($"DataTable already contained a field '{toColumnName}'");
×
120

121
        if (_mappingTable == null)
30✔
122
            BuildMappingTable(listener);
30✔
123

124
        if (!_mappingTable.Any())
30!
125
            throw new Exception("Mapping table was empty");
×
126

127
        if (_keyType == null)
30!
128
            throw new Exception("Unable to determine key datatype for mapping table");
×
129

130
        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
30✔
131
            $"Mapping table resulted in {_mappingTable.Count} unique possible input values"));
30✔
132
        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
30✔
133
            $"Mapping table resulted in {_mappingTable.Sum(kvp => kvp.Value.Count)} unique possible output values"));
122✔
134
        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
30✔
135
            $"Mapping table Key is of Type {_keyType}"));
30✔
136

137
        //add the new column (the output column).  Unless we are just updating the same input column
138
        if (!inPlace) toProcess.Columns.Add(toColumnName);
56✔
139

140

141
        var idxFrom = toProcess.Columns.IndexOf(fromColumnName);
30✔
142
        var idxTo = toProcess.Columns.IndexOf(toColumnName);
30✔
143

144
        var numberOfElementsPerRow = toProcess.Columns.Count;
30✔
145

146
        var newRows = new List<object[]>();
30✔
147
        var toDrop = new List<DataRow>();
30✔
148

149
        // Flag and anonymous method for converting between input data type and mapping table datatype
150
        var doTypeConversion = false;
30✔
151
        Func<object, object> typeConversion = null;
30✔
152

153
        //if there is a difference between the input column datatype and the mapping table datatatype
154
        if (toProcess.Columns[idxFrom].DataType != _keyType)
30✔
155
        {
156
            //tell the user
157
            listener.OnNotify(this,
2✔
158
                new NotifyEventArgs(ProgressEventType.Warning,
2✔
159
                    $"Input DataTable column {fromColumnName} is of data type {toProcess.Columns[idxFrom].DataType}, this differs from mapping table which is {_keyType}.  Type conversion will take place between these two Types when performing lookup"));
2✔
160
            doTypeConversion = true;
2✔
161

162
            //work out a suitable anonymous method for converting between the Types
163
            if (_keyType == typeof(string))
2!
164
                typeConversion = a => a.ToString();
×
165
            else
166
                try
167
                {
168
                    var deciderFactory = new TypeDeciderFactory(Culture);
2✔
169
                    var decider = deciderFactory.Create(_keyType);
2✔
170
                    typeConversion = a => decider.Parse(a.ToString());
4✔
171
                }
2✔
172
                catch (Exception ex)
×
173
                {
174
                    throw new Exception(
×
175
                        $"Error building Type conversion decider for the mapping table key type {_keyType}", ex);
×
176
                }
177
        }
178

179
        foreach (DataRow row in toProcess.Rows)
192✔
180
        {
181
            var fromValue = row[idxFrom];
68✔
182

183
            //ignore null inputs, pass them straight through
184
            if (fromValue == DBNull.Value)
68✔
185
            {
186
                row[idxTo] = DBNull.Value;
4✔
187
                continue;
4✔
188
            }
189

190
            //if we have to do a Type conversion
191
            if (doTypeConversion)
64✔
192
                // convert the input value to the mapping table key Type
193
                fromValue = typeConversion(fromValue);
2✔
194

195
            //if we don't have the key value
196
            if (!_mappingTable.ContainsKey(fromValue))
64✔
197
                if (CrashIfNoMappingsFound)
4✔
198
                {
199
                    throw new KeyNotFoundException($"Could not find mapping for {fromValue}");
2✔
200
                }
201
                else
202
                {
203
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning,
2✔
204
                        $"No mapping for '{fromValue}' dropping row"));
2✔
205
                    toDrop.Add(row);
2✔
206
                    continue;
2✔
207
                }
208

209
            //we do have the key value!
210
            var results = _mappingTable[fromValue];
60✔
211

212
            //yes 1
213
            if (results.Count == 1)
60✔
214
                row[idxTo] = results.Single();
56✔
215
            else
216
                //great we have multiple mappings, bob=>Frank and bob=>Jesus.  What does the user want to do about that
217
                switch (AliasResolutionStrategy)
4!
218
                {
219
                    case AliasResolutionStrategy.CrashIfAliasesFound:
220
                        throw new AliasException(
2✔
221
                            $"The value '{fromValue}' maps to multiple output values:{string.Join(",", results.Select(v => $"'{v}'"))}");
6✔
222

223
                    case AliasResolutionStrategy.MultiplyInputDataRowsByAliases:
224

225
                        //substitute for the first alias (bob=>Frank)
226
                        row[idxTo] = results.First();
2✔
227

228
                        //then clone the row and do a row with bob=>Jesus
229
                        foreach (var next in results.Skip(1))
8✔
230
                        {
231
                            //Create a copy of the input row
232
                            var newRow = new object[numberOfElementsPerRow];
2✔
233
                            row.ItemArray.CopyTo(newRow, 0);
2✔
234

235
                            //Set the aliasable element to the alias
236
                            newRow[idxTo] = next;
2✔
237

238
                            //Add it to our new rows collection
239
                            newRows.Add(newRow);
2✔
240
                        }
241

242
                        break;
243
                    default:
244
                        throw new ArgumentOutOfRangeException();
×
245
                }
246
        }
247

248
        //add any alias multiplication rows
249
        foreach (var newRow in newRows)
56✔
250
            toProcess.Rows.Add(newRow);
2✔
251

252
        //drop rows with missing identifiers
253
        foreach (var dropRow in toDrop)
56✔
254
            toProcess.Rows.Remove(dropRow);
2✔
255

256
        // drop column unless it is an inplace (no new columns) update or user wants to keep both
257
        if (!inPlace && !KeepInputColumnToo)
26✔
258
            toProcess.Columns.Remove(fromColumnName);
18✔
259

260
        return toProcess;
26✔
261
    }
262

263
    private void BuildMappingTable(IDataLoadEventListener listener)
264
    {
265
        //Get a new mapping table in memory
266
        _mappingTable = new Dictionary<object, List<object>>();
30✔
267

268
        //connect to server and run distinct query
269
        var server = MappingFromColumn.TableInfo.Discover(DataAccessContext.DataLoad).Database.Server;
30✔
270

271
        var fromColumnName = MappingFromColumn.GetRuntimeName();
30✔
272
        var toColumnName = MappingToColumn.GetRuntimeName();
30✔
273

274
        // The number of null key values found in the mapping table (these are ignored)
275
        var nulls = 0;
30✔
276

277
        //pull back all the data
278
        using (var con = server.GetConnection())
30✔
279
        {
280
            con.Open();
30✔
281
            var sql = GetMappingTableSql();
30✔
282

283
            using var cmd = server.GetCommand(sql, con);
30✔
284
            cmd.CommandTimeout = Timeout;
30✔
285

286
            using var r = cmd.ExecuteReader();
30✔
287
            while (r.Read())
140✔
288
            {
289
                var keyVal = r[fromColumnName];
110✔
290

291
                if (keyVal != DBNull.Value)
110✔
292
                {
293
                    if (_keyType == null)
108✔
294
                    {
295
                        _keyType = keyVal.GetType();
30✔
296
                    }
297
                    else
298
                    {
299
                        if (_keyType != keyVal.GetType())
78!
300
                            throw new Exception(
×
301
                                $"Database mapping table Keys were of mixed Types {_keyType} and {keyVal.GetType()}");
×
302
                    }
303
                }
304
                else
305
                {
306
                    nulls++;
2✔
307
                    continue;
2✔
308
                }
309

310
                if (!_mappingTable.ContainsKey(keyVal))
108✔
311
                    _mappingTable.Add(keyVal, new List<object>());
92✔
312

313
                _mappingTable[keyVal].Add(r[toColumnName]);
108✔
314
            }
315
        }
316

317
        if (nulls > 0)
30✔
318
            listener.OnNotify(this,
2✔
319
                new NotifyEventArgs(ProgressEventType.Warning,
2✔
320
                    $"Discarded {nulls} Null key values read from mapping table"));
2✔
321
    }
30✔
322

323
    protected virtual string GetMappingTableSql()
324
    {
325
        var repo = new MemoryCatalogueRepository();
60✔
326

327
        var qb = new QueryBuilder("DISTINCT", null, null);
60✔
328
        qb.AddColumn(new ColumnInfoToIColumn(repo, MappingFromColumn));
60✔
329
        qb.AddColumn(new ColumnInfoToIColumn(repo, MappingToColumn));
60✔
330

331
        if (!string.IsNullOrWhiteSpace(WHERELogic))
60✔
332
        {
333
            var container = new SpontaneouslyInventedFilterContainer(repo, null, null, FilterContainerOperation.AND);
12✔
334
            var filter = new SpontaneouslyInventedFilter(repo, container, WHERELogic, "WHERELogic", null, null);
12✔
335
            container.AddChild(filter);
12✔
336

337
            qb.RootFilterContainer = container;
12✔
338
        }
339

340
        return AdjustForProjectTokens(qb.SQL);
60✔
341
    }
342

343
    private string AdjustForProjectTokens(string mappingTableSql)
344
    {
345
        if (mappingTableSql.Contains("$p"))
60!
346
        {
347
            if (_project == null)
×
348
                throw new Exception("You cannot use $p in contexts where there is no Project available");
×
349

350
            mappingTableSql = mappingTableSql.Replace("$p",
×
351
                _project.Name?.ToString() ?? throw new Exception("Project didn't have a Project Name"));
×
352
        }
353

354
        if (mappingTableSql.Contains("$n"))
60✔
355
        {
356
            if (_project == null)
4!
357
                throw new Exception("You cannot use $n in contexts where there is no Project available");
×
358

359
            mappingTableSql = mappingTableSql.Replace("$n",
4!
360
                _project.ProjectNumber?.ToString() ??
4✔
361
                throw new Exception($"Project '{_project.Name}' didn't have a Project Number"));
4✔
362
        }
363

364
        if (mappingTableSql.Contains("$t"))
60!
365
        {
366
            if (_project == null)
×
367
                throw new Exception("You cannot use $t in contexts where there is no Project available");
×
368

369
            mappingTableSql = mappingTableSql.Replace("$t",
×
370
                _project.MasterTicket?.ToString() ??
×
371
                throw new Exception($"Project '{_project.Name}' didn't have a Master Ticket"));
×
372
        }
373

374
        if (mappingTableSql.Contains("$r"))
60!
375
        {
376
            if (_configuration == null)
×
377
                throw new Exception(
×
378
                    "You cannot use $r in contexts where there is no ExtractionConfiguration available");
×
379

380
            mappingTableSql = mappingTableSql.Replace("$r",
×
381
                _configuration.RequestTicket?.ToString() ??
×
382
                throw new Exception($"Extraction Configuration '{_configuration.Name}' didn't have a Request Ticket"));
×
383
        }
384

385
        if (mappingTableSql.Contains("$l"))
60!
386
        {
387
            if (_configuration == null)
×
388
                throw new Exception(
×
389
                    "You cannot use $l in contexts where there is no ExtractionConfiguration available");
×
390

391
            mappingTableSql = mappingTableSql.Replace("$l",
×
392
                _configuration.ReleaseTicket?.ToString() ??
×
393
                throw new Exception($"Extraction Configuration '{_configuration.Name}' didn't have a Release Ticket"));
×
394
        }
395

396

397
        return mappingTableSql;
60✔
398
    }
399

400
    public void Dispose(IDataLoadEventListener listener, Exception pipelineFailureExceptionIfAny)
401
    {
402
        //free up memory
403
        if (_mappingTable != null)
×
404
        {
405
            _mappingTable.Clear();
×
406
            _mappingTable = null;
×
407
        }
408
    }
×
409

410
    public void Abort(IDataLoadEventListener listener)
411
    {
412
    }
×
413

414
    public virtual void Check(ICheckNotifier notifier)
415
    {
416
        if (!string.IsNullOrWhiteSpace(WHERELogic))
30✔
417
            if (WHERELogic.StartsWith("WHERE"))
6!
418
                throw new Exception("WHERE logic should not start with WHERE");
×
419

420
        if (MappingFromColumn == null || MappingToColumn == null)
30!
421
            throw new Exception("Mapping From/To Column missing, these are Mandatory");
×
422

423
        if (MappingFromColumn.TableInfo_ID != MappingToColumn.TableInfo_ID)
30!
424
            throw new Exception("MappingFromColumn and MappingToColumn must belong to the same table");
×
425

426
        notifier.OnCheckPerformed(new CheckEventArgs(
30✔
427
            $"Mapping table SQL is:{Environment.NewLine}{GetMappingTableSql()}", CheckResult.Success));
30✔
428
    }
30✔
429

430
    public void PreInitialize(IExtractCommand value, IDataLoadEventListener listener)
431
    {
432
        _project = value.Configuration?.Project;
2!
433
        _configuration = value.Configuration;
2✔
434
    }
2✔
435

436
    public void PreInitialize(ICohortCreationRequest value, IDataLoadEventListener listener)
437
    {
438
        _project = value.Project;
×
439
    }
×
440
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc