• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 11978888584

22 Nov 2024 07:27PM UTC coverage: 57.383% (-0.002%) from 57.385%
11978888584

push

github

jas88
Fix up redundant type inheritance

11206 of 21050 branches covered (53.24%)

Branch coverage included in aggregate %.

65 of 249 new or added lines in 42 files covered. (26.1%)

17 existing lines in 14 files now uncovered.

31718 of 53752 relevant lines covered (59.01%)

8290.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.35
/Rdmp.Core/Curation/Data/DataLoad/ANOTable.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Collections.Generic;
9
using System.Data.Common;
10
using System.Linq;
11
using System.Text.RegularExpressions;
12
using FAnsi.Discovery;
13
using Rdmp.Core.Curation.Data.ImportExport;
14
using Rdmp.Core.Curation.Data.Serialization;
15
using Rdmp.Core.Databases;
16
using Rdmp.Core.MapsDirectlyToDatabaseTable;
17
using Rdmp.Core.MapsDirectlyToDatabaseTable.Attributes;
18
using Rdmp.Core.Repositories;
19
using Rdmp.Core.ReusableLibraryCode;
20
using Rdmp.Core.ReusableLibraryCode.Checks;
21
using Rdmp.Core.ReusableLibraryCode.DataAccess;
22

23
namespace Rdmp.Core.Curation.Data.DataLoad;
24

25
/// <summary>
26
/// Defines an anonymisation method for a group of related columns of the same datatype.  For example 'ANOGPCode' could be an instance/record that defines input of type
27
/// varchar(5) and anonymises into 3 digits and 2 characters with a suffix of _G.  This product would then be used by all ColumnInfos that contain GP codes (current GP
28
/// previous GP, Prescriber code etc).  Anonymisation occurs at  ColumnInfo level after being loaded from a RAW data load bubble as is pushed to the STAGING bubble.
29
/// 
30
/// <para>Each ANOTable describes a corresponding table on an ANO server (see the Server_ID property - we refer to this as an ANOStore) including details of the
31
/// transformation and a UNIQUE name/suffix.  This let's you quickly identify what data has be annonymised by what ANOTable.</para>
32
///  
33
/// <para>It is very important to curate your ANOTables properly or you could end up with irrecoverable data, for example sticking to a single ANO server, taking regular backups
34
/// NEVER deleting ANOTables that reference existing data  (in the ANOStore database).</para>
35
/// 
36
/// </summary>
37
public class ANOTable : DatabaseEntity, ICheckable, IHasDependencies
38
{
39
    /// <summary>
40
    /// Prefix to put on anonymous columns
41
    /// </summary>
42
    public const string ANOPrefix = "ANO";
43

44
    private string _identifiableDataType;
45
    private string _anonymousDataType;
46

47
    #region Database Properties
48

49
    private string _tableName;
50
    private int _numberOfIntegersToUseInAnonymousRepresentation;
51
    private int _numberOfCharactersToUseInAnonymousRepresentation;
52
    private string _suffix;
53
    private int _serverID;
54

55
    /// <summary>
56
    /// The name of the table in the ANO database that stores swapped identifiers
57
    /// </summary>
58
    public string TableName
59
    {
60
        get => _tableName;
992✔
61
        set => SetField(ref _tableName, value);
294✔
62
    }
63

64
    /// <summary>
65
    /// The number of decimal characters to use when creating ANO mapping identifiers.  This will directly impact the number of possible values that can be generated and therefore
66
    /// the number of unique input values before anonymising fails (due to collisions).
67
    /// </summary>
68
    public int NumberOfIntegersToUseInAnonymousRepresentation
69
    {
70
        get => _numberOfIntegersToUseInAnonymousRepresentation;
542✔
71
        set => SetField(ref _numberOfIntegersToUseInAnonymousRepresentation, value);
392✔
72
    }
73

74
    /// <summary>
75
    /// The number of alphabetic characters to use when creating ANO mapping identifiers.  This will directly impact the number of possible values that can be generated and therefore
76
    /// the number of unique input values before anonymising fails (due to collisions).
77
    /// </summary>
78
    public int NumberOfCharactersToUseInAnonymousRepresentation
79
    {
80
        get => _numberOfCharactersToUseInAnonymousRepresentation;
550✔
81
        set => SetField(ref _numberOfCharactersToUseInAnonymousRepresentation, value);
404✔
82
    }
83

84
    /// <summary>
85
    /// The ID of the ExternalDatabaseServer which stores the anonymous identifier substitutions (e.g. chi=>ANOchi).  This should have been created by the
86
    /// <see cref="ANOStorePatcher"/>
87
    /// </summary>
88
    [Relationship(typeof(ExternalDatabaseServer), RelationshipType.SharedObject)]
89
    public int Server_ID
90
    {
91
        get => _serverID;
688✔
92
        set => SetField(ref _serverID, value);
294✔
93
    }
94

95
    /// <summary>
96
    /// The letter that appears on the end of all anonymous identifiers generated e.g. AAB11_GP would have the suffix "GP"
97
    /// 
98
    /// <para>Once you have started using the <see cref="ANOTable"/> to anonymise identifiers you should not change the Suffix</para>
99
    /// </summary>
100
    public string Suffix
101
    {
102
        get => _suffix;
528✔
103
        set => SetField(ref _suffix, value);
298✔
104
    }
105

106
    #endregion
107

108
    #region Relationships
109

110
    /// <inheritdoc cref="Server_ID"/>
111
    [NoMappingToDatabase]
112
    public ExternalDatabaseServer Server => Repository.GetObjectByID<ExternalDatabaseServer>(Server_ID);
382✔
113

114
    #endregion
115

116
    public ANOTable()
×
117
    {
118
        // Defaults
119
        NumberOfIntegersToUseInAnonymousRepresentation = 1;
×
120
        NumberOfCharactersToUseInAnonymousRepresentation = 1;
×
121
    }
×
122

123
    /// <summary>
124
    /// Declares that a new ANOTable (anonymous mapping table) should exist in the referenced database.  You can call this constructor without first creating the table.  If you do
125
    /// you should set <see cref="NumberOfIntegersToUseInAnonymousRepresentation"/> and <see cref="NumberOfCharactersToUseInAnonymousRepresentation"/> then <see cref="PushToANOServerAsNewTable"/>
126
    /// </summary>
127
    /// <param name="repository"></param>
128
    /// <param name="externalDatabaseServer"></param>
129
    /// <param name="tableName"></param>
130
    /// <param name="suffix"></param>
131
    public ANOTable(ICatalogueRepository repository, ExternalDatabaseServer externalDatabaseServer, string tableName,
70✔
132
        string suffix)
70✔
133
    {
134
        if (string.IsNullOrWhiteSpace(tableName))
70!
135
            throw new NullReferenceException("ANOTable must have a name");
×
136

137
        // Defaults
138
        NumberOfIntegersToUseInAnonymousRepresentation = 1;
70✔
139
        NumberOfCharactersToUseInAnonymousRepresentation = 1;
70✔
140

141
        if (repository.GetAllObjects<ANOTable>().Any(a => string.Equals(a.Suffix, suffix)))
72✔
142
            throw new Exception($"There is already another {nameof(ANOTable)} with the suffix '{suffix}'");
2✔
143

144
        repository.InsertAndHydrate(this, new Dictionary<string, object>
68✔
145
        {
68✔
146
            { "TableName", tableName },
68✔
147
            { "Suffix", suffix },
68✔
148
            { "Server_ID", externalDatabaseServer.ID }
68✔
149
        });
68✔
150
    }
68✔
151

152
    internal ANOTable(ICatalogueRepository repository, DbDataReader r)
153
        : base(repository, r)
182✔
154
    {
155
        Server_ID = Convert.ToInt32(r["Server_ID"]);
182✔
156
        TableName = r["TableName"].ToString();
182✔
157

158
        NumberOfIntegersToUseInAnonymousRepresentation =
182✔
159
            Convert.ToInt32(r["NumberOfIntegersToUseInAnonymousRepresentation"].ToString());
182✔
160
        NumberOfCharactersToUseInAnonymousRepresentation =
182✔
161
            Convert.ToInt32(r["NumberOfCharactersToUseInAnonymousRepresentation"].ToString());
182✔
162
        Suffix = r["Suffix"].ToString();
182✔
163
    }
182✔
164

165
    internal ANOTable(ShareManager shareManager, ShareDefinition shareDefinition)
8✔
166
    {
167
        shareManager.UpsertAndHydrate(this, shareDefinition);
8✔
168
    }
8✔
169

170
    /// <summary>
171
    /// Saves the current state to the database if the <see cref="ANOTable"/> is in a valid state according to <see cref="Check"/> otherwise throws an Exception
172
    /// </summary>
173
    public override void SaveToDatabase()
174
    {
175
        Check(ThrowImmediatelyCheckNotifier.Quiet);
70✔
176
        Repository.SaveToDatabase(this);
64✔
177
    }
64✔
178

179
    /// <summary>
180
    /// Attempts to delete the remote mapping table (only works if it is empty) if the <see cref="ANOTable.IsTablePushed"/> then deletes the <see cref="ANOTable"/> reference
181
    /// object (this) from the RDMP platform database.
182
    /// </summary>
183
    public override void DeleteInDatabase()
184
    {
185
        DeleteANOTableInANOStore();
38✔
186
        Repository.DeleteFromDatabase(this);
38✔
187
    }
38✔
188

189
    /// <inheritdoc/>
190
    public override string ToString() => TableName;
40✔
191

192
    /// <summary>
193
    /// Checks that the remote mapping table referenced by this object exists and checks <see cref="ANOTable"/> settings (<see cref="Suffix"/> etc).
194
    /// </summary>
195
    /// <param name="notifier"></param>
196
    public void Check(ICheckNotifier notifier)
197
    {
198
        if (string.IsNullOrWhiteSpace(Suffix))
78!
199
            notifier.OnCheckPerformed(
×
200
                new CheckEventArgs(
×
201
                    "You must choose a suffix for your ANO identifiers so that they can be distinguished from regular identifiers",
×
202
                    CheckResult.Fail));
×
203
        else if (Suffix.StartsWith('_'))
78!
204
            notifier.OnCheckPerformed(new CheckEventArgs(
×
205
                "Suffix will automatically include an underscore, there is no need to add it", CheckResult.Fail));
×
206

207
        if (NumberOfIntegersToUseInAnonymousRepresentation < 0)
78✔
208
            notifier.OnCheckPerformed(
2✔
209
                new CheckEventArgs("NumberOfIntegersToUseInAnonymousRepresentation cannot be negative",
2✔
210
                    CheckResult.Fail));
2✔
211

212
        if (NumberOfCharactersToUseInAnonymousRepresentation < 0)
76✔
213
            notifier.OnCheckPerformed(
2✔
214
                new CheckEventArgs("NumberOfCharactersToUseInAnonymousRepresentation cannot be negative",
2✔
215
                    CheckResult.Fail));
2✔
216

217
        if (NumberOfCharactersToUseInAnonymousRepresentation + NumberOfIntegersToUseInAnonymousRepresentation == 0)
74✔
218
            notifier.OnCheckPerformed(
2✔
219
                new CheckEventArgs("Anonymous representations must have at least 1 integer or character",
2✔
220
                    CheckResult.Fail));
2✔
221

222
        try
223
        {
224
            if (!IsTablePushed())
72✔
225
                notifier.OnCheckPerformed(new CheckEventArgs($"Could not find table {TableName} on server {Server}",
28✔
226
                    CheckResult.Warning));
28✔
227
        }
66✔
228
        catch (Exception e)
6✔
229
        {
230
            notifier.OnCheckPerformed(new CheckEventArgs($"Failed to get list of tables on server {Server}",
6✔
231
                CheckResult.Fail, e));
6✔
232
        }
6✔
233
    }
72✔
234

235
    /// <summary>
236
    /// Returns true if the anonymous mapping table (<see cref="TableName"/> exists in the referenced mapping database (<see cref="Server"/>)
237
    /// </summary>
238
    /// <returns></returns>
239
    public bool IsTablePushed() => GetPushedTable() != null;
80✔
240

241
    /// <summary>
242
    /// Connects to <see cref="Server"/> and returns a <see cref="DiscoveredTable"/> that contains the anonymous identifier mappings
243
    /// </summary>
244
    /// <returns></returns>
245
    public DiscoveredTable GetPushedTable()
246
    {
247
        if (!Server.WasCreatedBy(new ANOStorePatcher()))
112✔
248
            throw new Exception($"ANOTable's Server '{Server}' is not an ANOStore.  ANOTable was '{this}'");
4✔
249

250
        var tables = DataAccessPortal
108✔
251
            .ExpectDatabase(Server, DataAccessContext.DataLoad)
108✔
252
            .DiscoverTables(false);
108✔
253

254
        return tables.SingleOrDefault(t => t.GetRuntimeName().Equals(TableName));
372✔
255
    }
256

257
    /// <summary>
258
    /// Attempts to delete the anonymous mapping table referenced by <see cref="TableName"/> on the mapping <see cref="Server"/>.  This is safer than just dropping
259
    /// from <see cref="GetPushedTable"/> since it will check the table exists, is empty etc.
260
    /// </summary>
261
    public void DeleteANOTableInANOStore()
262
    {
263
        RevertToDatabaseState();
38✔
264

265
        var s = Server;
38✔
266
        if (string.IsNullOrWhiteSpace(s.Name) || string.IsNullOrWhiteSpace(s.Database) ||
38✔
267
            string.IsNullOrWhiteSpace(TableName))
38✔
268
            return;
12✔
269

270
        var tbl = GetPushedTable();
26✔
271

272
        if (tbl?.Exists() == true)
26✔
273
            if (!tbl.IsEmpty())
12!
274
                throw new Exception(
×
275
                    $"Cannot delete ANOTable because it references {TableName} which is a table on server {Server} which contains rows, deleting this reference would leave that table as an orphan, we can only delete when there are 0 rows in the table");
×
276
            else
277
                tbl.Drop();
12✔
278
    }
26✔
279

280
    /// <summary>
281
    /// Connects to the remote ANO Server and creates a swap table of Identifier to ANOIdentifier
282
    /// </summary>
283
    /// <param name="identifiableDatatype">The datatype of the identifiable data table</param>
284
    /// <param name="notifier"></param>
285
    /// <param name="forceConnection"></param>
286
    /// <param name="forceTransaction"></param>
287
    public void PushToANOServerAsNewTable(string identifiableDatatype, ICheckNotifier notifier,
288
        DbConnection forceConnection = null, DbTransaction forceTransaction = null)
289
    {
290
        var server = DataAccessPortal.ExpectServer(Server, DataAccessContext.DataLoad);
40✔
291

292
        //matches varchar(100) and has capture group 100
293
        var regexGetLengthOfCharType = new Regex(@".*char.*\((\d*)\)");
40✔
294
        var match = regexGetLengthOfCharType.Match(identifiableDatatype);
40✔
295

296
        //if user supplies varchar(100) and says he wants 3 ints and 3 chars in his anonymous identifiers he will soon run out of combinations
297

298
        if (match.Success)
40✔
299
        {
300
            var length = Convert.ToInt32(match.Groups[1].Value);
32✔
301

302
            if (length >
32!
303
                NumberOfCharactersToUseInAnonymousRepresentation + NumberOfIntegersToUseInAnonymousRepresentation)
32✔
304
                notifier.OnCheckPerformed(
×
305
                    new CheckEventArgs(
×
306
                        $"You asked to create a table with a datatype of length {length}({identifiableDatatype}) but you did not allocate an equal or greater number of anonymous identifier types (NumberOfCharactersToUseInAnonymousRepresentation + NumberOfIntegersToUseInAnonymousRepresentation={NumberOfCharactersToUseInAnonymousRepresentation + NumberOfIntegersToUseInAnonymousRepresentation})",
×
307
                        CheckResult.Warning));
×
308
        }
309

310
        var con = forceConnection ?? server.GetConnection(); //use the forced connection or open a new one
40✔
311

312
        try
313
        {
314
            if (forceConnection == null)
40✔
315
                con.Open();
38✔
316
        }
40✔
317
        catch (Exception e)
×
318
        {
319
            notifier.OnCheckPerformed(new CheckEventArgs($"Could not connect to ano server {Server}", CheckResult.Fail,
×
320
                e));
×
321
            return;
×
322
        }
323

324
        //if table name is ANOChi there are 2 columns Chi and ANOChi in it
325
        var anonymousColumnName = TableName;
40✔
326
        var identifiableColumnName = TableName["ANO".Length..];
40✔
327

328
        var anonymousDatatype =
40✔
329
            $"varchar({NumberOfCharactersToUseInAnonymousRepresentation + NumberOfIntegersToUseInAnonymousRepresentation + "_".Length + Suffix.Length})";
40✔
330

331

332
        var sql =
40✔
333
            $"CREATE TABLE {TableName}{Environment.NewLine} ({Environment.NewLine}{identifiableColumnName} {identifiableDatatype} NOT NULL,{Environment.NewLine}{anonymousColumnName} {anonymousDatatype}NOT NULL";
40✔
334

335
        sql += $@",
40✔
336
CONSTRAINT PK_{TableName} PRIMARY KEY CLUSTERED 
40✔
337
(
40✔
338
        {identifiableColumnName} ASC
40✔
339
),
40✔
340
CONSTRAINT AK_{TableName} UNIQUE({anonymousColumnName})
40✔
341
)";
40✔
342

343

344
        using (var cmd = server.GetCommand(sql, con))
40✔
345
        {
346
            cmd.Transaction = forceTransaction;
40✔
347

348
            notifier.OnCheckPerformed(new CheckEventArgs($"Decided appropriate create statement is:{cmd.CommandText}",
40✔
349
                CheckResult.Success));
40✔
350
            try
351
            {
352
                cmd.ExecuteNonQuery();
40✔
353

354
                if (forceConnection == null) //if we opened this ourselves
40✔
355
                    con.Close(); //shut it
38✔
356
            }
40✔
357
            catch (Exception e)
×
358
            {
359
                notifier.OnCheckPerformed(
×
360
                    new CheckEventArgs(
×
361
                        $"Failed to successfully create the anonymous/identifier mapping Table in the ANO database on server {Server}",
×
362
                        CheckResult.Fail, e));
×
363
                return;
×
364
            }
365
        }
366

367
        try
368
        {
369
            if (forceTransaction ==
40✔
370
                null) //if there was no transaction then this has hit the LIVE ANO database and is for real, so save the ANOTable such that it is synchronized with reality
40✔
371
            {
372
                notifier.OnCheckPerformed(new CheckEventArgs("Saving state because table has been pushed",
38✔
373
                    CheckResult.Success));
38✔
374
                SaveToDatabase();
38✔
375
            }
376
        }
40✔
377
        catch (Exception e)
×
378
        {
379
            notifier.OnCheckPerformed(new CheckEventArgs(
×
380
                "Failed to save state after table was successfully? pushed to ANO server", CheckResult.Fail, e));
×
381
        }
×
382
    }
40✔
383

384

385
    /// <summary>
386
    /// Anonymisation with an <see cref="ANOTable"/> happens during data load.  This means that the column goes from identifiable in RAW to anonymous in STAGING/LIVE.  This means
387
    /// that the datatype of the column changes depending on the <see cref="LoadStage"/>.
388
    /// 
389
    /// <para>Returns the appropriate datatype for the <see cref="LoadStage"/>.  This is done by connecting to the mapping table and retrieving the mapping table types</para>
390
    /// </summary>
391
    /// <param name="loadStage"></param>
392
    /// <returns></returns>
393
    public string GetRuntimeDataType(LoadStage loadStage)
394
    {
395
        //cache answers
396
        if (_identifiableDataType == null)
28✔
397
        {
398
            var server = DataAccessPortal.ExpectServer(Server, DataAccessContext.DataLoad);
20✔
399

400
            var columnsFoundInANO = server.GetCurrentDatabase().ExpectTable(TableName).DiscoverColumns();
20✔
401

402
            var expectedIdentifiableName = TableName["ANO".Length..];
20✔
403

404
            var anonymous = columnsFoundInANO.SingleOrDefault(c => c.GetRuntimeName().Equals(TableName));
60✔
405
            var identifiable =
20✔
406
                columnsFoundInANO.SingleOrDefault(c => c.GetRuntimeName().Equals(expectedIdentifiableName));
60✔
407

408
            if (anonymous == null)
20!
409
                throw new Exception(
×
NEW
410
                    $"Could not find a column called {TableName} in table {TableName} on server {Server} (Columns found were {string.Join(",", columnsFoundInANO.Select(static c => c.GetRuntimeName()).ToArray())})");
×
411

412
            if (identifiable == null)
20!
413
                throw new Exception(
×
NEW
414
                    $"Could not find a column called {expectedIdentifiableName} in table {TableName} on server {Server} (Columns found were {string.Join(",", columnsFoundInANO.Select(static c => c.GetRuntimeName()).ToArray())})");
×
415

416
            _identifiableDataType = identifiable.DataType.SQLType;
20✔
417
            _anonymousDataType = anonymous.DataType.SQLType;
20✔
418
        }
419

420
        //return cached answer
421
        return loadStage switch
28!
422
        {
28✔
423
            LoadStage.GetFiles => _identifiableDataType,
×
424
            LoadStage.Mounting => _identifiableDataType,
×
425
            LoadStage.AdjustRaw => _identifiableDataType,
×
426
            LoadStage.AdjustStaging => _anonymousDataType,
×
427
            LoadStage.PostLoad => _anonymousDataType,
28✔
428
            _ => throw new ArgumentOutOfRangeException(nameof(loadStage))
×
429
        };
28✔
430
    }
431

432
    /// <inheritdoc/>
433
    public IHasDependencies[] GetObjectsThisDependsOn() => Array.Empty<IHasDependencies>();
×
434

435
    /// <inheritdoc/>
436
    public IHasDependencies[] GetObjectsDependingOnThis() => Repository.GetAllObjectsWithParent<ColumnInfo>(this);
38✔
437
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc