• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 6237307473

19 Sep 2023 04:02PM UTC coverage: 57.015% (-0.4%) from 57.44%
6237307473

push

github

web-flow
Feature/rc4 (#1570)

* Syntax tidying
* Dependency updates
* Event handling singletons (ThrowImmediately and co)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: James A Sutherland <>
Co-authored-by: James Friel <jfriel001@dundee.ac.uk>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

10734 of 20259 branches covered (0.0%)

Branch coverage included in aggregate %.

5922 of 5922 new or added lines in 565 files covered. (100.0%)

30687 of 52390 relevant lines covered (58.57%)

7361.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.22
/Rdmp.Core/DataLoad/Modules/DataFlowOperations/Aliases/AliasHandler.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Collections.Generic;
9
using System.Data;
10
using System.Linq;
11
using Rdmp.Core.Curation.Data;
12
using Rdmp.Core.DataFlowPipeline;
13
using Rdmp.Core.DataLoad.Modules.DataFlowOperations.Aliases.Exceptions;
14
using Rdmp.Core.ReusableLibraryCode.Checks;
15
using Rdmp.Core.ReusableLibraryCode.DataAccess;
16
using Rdmp.Core.ReusableLibraryCode.Progress;
17

18
namespace Rdmp.Core.DataLoad.Modules.DataFlowOperations.Aliases;
19

20
/// <summary>
21
/// Pipeline component for resolving the situation where a given unique patient identifier isn't unique (i.e. a person has aliases) by applying an
22
/// AliasResolutionStrategy (See AliasHandler.docx)
23
/// </summary>
24
public class AliasHandler : IPluginDataFlowComponent<DataTable>
25
{
26
    [DemandsInitialization("The server that will be connected to to fetch the alias resolution table",
27
        Mandatory = true)]
28
    public ExternalDatabaseServer ServerToExecuteQueryOn { get; set; }
28✔
29

30
    [DemandsInitialization(
31
        "The context under which to connect to the server, if unsure just select DataAccessContext.DataLoad (this only matters if you have encrypted logon credentials configured on a per context level)",
32
        DemandType.Unspecified, DataAccessContext.DataLoad)]
33
    public DataAccessContext DataAccessContext { get; set; }
28✔
34

35
    [DemandsInitialization(
36
        "A fully specified SQL Select query to execute on ServerToExecuteQueryOn, this should result in the alias table.  The alias table must have only 2 columns.  The first column must match a column name in the input DataTable.  The second column must contain a known aliases which is different from the first column value.",
37
        DemandType.SQL, Mandatory = true)]
38
    public string AliasTableSQL { get; set; }
28✔
39

40
    [DemandsInitialization("Maximum amount of time in seconds to let the AliasTableSQL execute for before giving up",
41
        DemandType.Unspecified, 120)]
42
    public int TimeoutForAssemblingAliasTable { get; set; }
28✔
43

44
    [DemandsInitialization("Strategy for dealing with aliases in DataTables")]
45
    public AliasResolutionStrategy ResolutionStrategy { get; set; }
26✔
46

47
    [DemandsInitialization(
48
        "The name of the input column name in pipeline data (which must exist!) which contains the aliasable values.  This is probably your patient identifier column.",
49
        Mandatory = true)]
50
    public string AliasColumnInInputDataTables { get; set; }
62✔
51

52
    public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener,
53
        GracefulCancellationToken cancellationToken)
54
    {
55
        var newRows = new List<object[]>();
14✔
56

57
        _aliasDictionary ??= GenerateAliasTable(TimeoutForAssemblingAliasTable);
14✔
58

59
        var idx = toProcess.Columns.IndexOf(AliasColumnInInputDataTables);
10✔
60

61
        if (idx == -1)
10✔
62
            throw new KeyNotFoundException(
2✔
63
                $"You asked to resolve aliases on a column called '{AliasColumnInInputDataTables}' but no column by that name appeared in the DataTable being processed.  Columns in that table were:{string.Join(",", toProcess.Columns.Cast<DataColumn>().Select(c => c.ColumnName))}");
4✔
64

65
        var elements = toProcess.Columns.Count;
8✔
66

67
        var matchesFound = 0;
8✔
68

69
        foreach (DataRow r in toProcess.Rows)
50✔
70
            if (_aliasDictionary.TryGetValue(r[AliasColumnInInputDataTables], out var aliases))
18✔
71
            {
72
                matchesFound++;
6✔
73
                switch (ResolutionStrategy)
6!
74
                {
75
                    case AliasResolutionStrategy.CrashIfAliasesFound:
76
                        throw new AliasException(
2✔
77
                            $"Found Alias in input data and ResolutionStrategy is {ResolutionStrategy}, aliased value was {r[AliasColumnInInputDataTables]}");
2✔
78

79
                    case AliasResolutionStrategy.MultiplyInputDataRowsByAliases:
80

81
                        //Get all aliases for the input value
82
                        foreach (var alias in aliases)
20✔
83
                        {
84
                            //Create a copy of the input row
85
                            var newRow = new object[elements];
6✔
86
                            r.ItemArray.CopyTo(newRow, 0);
6✔
87

88
                            //Set the aliasable element to the alias
89
                            newRow[idx] = alias;
6✔
90

91
                            //Add it to our new rows collection
92
                            newRows.Add(newRow);
6✔
93
                        }
94

95
                        break;
96
                    default:
97
                        throw new ArgumentOutOfRangeException();
×
98
                }
99
            }
100

101
        if (newRows.Any())
6✔
102
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning,
4✔
103
                $"Found {matchesFound} aliased input values, resolved by adding {newRows.Count} additional duplicate rows to the dataset"));
4✔
104
        else
105
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
2✔
106
                $"No Aliases found for identifiers in column {AliasColumnInInputDataTables}"));
2✔
107

108
        foreach (var newRow in newRows)
24✔
109
            toProcess.Rows.Add(newRow);
6✔
110

111
        return toProcess;
6✔
112
    }
113

114
    public void Dispose(IDataLoadEventListener listener, Exception pipelineFailureExceptionIfAny)
115
    {
116
        _aliasDictionary?.Clear(); //Free up memory
×
117
    }
×
118

119
    public void Abort(IDataLoadEventListener listener)
120
    {
121
    }
×
122

123
    public void Check(ICheckNotifier notifier)
124
    {
125
        var timeout = 5;
×
126
        try
127
        {
128
            var result = GenerateAliasTable(timeout);
×
129
            notifier.OnCheckPerformed(new CheckEventArgs($"Found {result.Count} aliases", CheckResult.Success));
×
130
        }
×
131
        catch (Exception e)
×
132
        {
133
            var isTimeout = e.Message.ToLower().Contains("timeout");
×
134
            notifier.OnCheckPerformed(new CheckEventArgs($"Failed to generate alias table after {timeout}s",
×
135
                isTimeout ? CheckResult.Warning : CheckResult.Fail, e));
×
136
        }
×
137
    }
×
138

139
    private Dictionary<object, List<object>> _aliasDictionary;
140

141
    private Dictionary<object, List<object>> GenerateAliasTable(int timeoutInSeconds)
142
    {
143
        const string expectation =
144
            "(expected the query to return 2 columns, the first one being the input column the second being known aliases)";
145

146
        var toReturn = new Dictionary<object, List<object>>();
14✔
147

148
        var server = DataAccessPortal.ExpectServer(ServerToExecuteQueryOn, DataAccessContext);
14✔
149

150
        using var con = server.GetConnection();
14✔
151
        con.Open();
14✔
152

153
        using var cmd = server.GetCommand(AliasTableSQL, con);
14✔
154
        cmd.CommandTimeout = timeoutInSeconds;
14✔
155

156
        using var r = cmd.ExecuteReader();
14✔
157
        var haveCheckedColumns = false;
14✔
158

159
        while (r.Read())
110✔
160
        {
161
            if (!haveCheckedColumns)
100✔
162
            {
163
                int idx;
164

165
                try
166
                {
167
                    idx = r.GetOrdinal(AliasColumnInInputDataTables);
14✔
168
                }
14✔
169
                catch (IndexOutOfRangeException)
×
170
                {
171
                    throw new AliasTableFetchException(
×
172
                        $"Alias table did not contain a column called '{AliasColumnInInputDataTables}' {expectation}");
×
173
                }
174

175
                if (idx == -1)
14!
176
                    throw new AliasTableFetchException(
×
177
                        $"Alias table did not contain a column called '{AliasColumnInInputDataTables}' {expectation}");
×
178

179
                if (idx != 0)
14!
180
                    throw new AliasTableFetchException(
×
181
                        $"Alias table DID contain column '{AliasColumnInInputDataTables}' but it was not the first column in the result set {expectation}");
×
182

183
                if (r.FieldCount != 2)
14✔
184
                    throw new AliasTableFetchException(
2✔
185
                        $"Alias table SQL resulted in {r.FieldCount} fields being returned, we expect exactly 2 {expectation}");
2✔
186

187
                haveCheckedColumns = true;
12✔
188
            }
189

190
            var input = r[0];
98✔
191
            var alias = r[1];
98✔
192

193
            if (input == null || input == DBNull.Value || alias == null || alias == DBNull.Value)
98!
194
                throw new AliasTableFetchException("Alias table contained nulls");
×
195

196
            if (input.Equals(alias))
98✔
197
                throw new AliasTableFetchException(
2✔
198
                    "Alias table SQL should only return aliases not exact matches e.g. in the case of a simple alias X is Y, do not return 4 rows {X=X AND Y=Y AND Y=X AND X=Y}, only return 2 rows {X=Y and Y=X}");
2✔
199

200
            if (!toReturn.ContainsKey(input))
96✔
201
                toReturn.Add(input, new List<object>());
60✔
202

203
            toReturn[input].Add(alias);
96✔
204
        }
205

206
        return toReturn;
10✔
207
    }
10✔
208
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc