• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 6237307473

19 Sep 2023 04:02PM UTC coverage: 57.015% (-0.4%) from 57.44%
6237307473

push

github

web-flow
Feature/rc4 (#1570)

* Syntax tidying
* Dependency updates
* Event handling singletons (ThrowImmediately and co)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: James A Sutherland <>
Co-authored-by: James Friel <jfriel001@dundee.ac.uk>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

10734 of 20259 branches covered (0.0%)

Branch coverage included in aggregate %.

5922 of 5922 new or added lines in 565 files covered. (100.0%)

30687 of 52390 relevant lines covered (58.57%)

7361.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

53.52
/Rdmp.Core/DataLoad/Engine/Pipeline/Components/Anonymisation/BasicAnonymisationEngine.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Collections.Generic;
9
using System.Data;
10
using System.Diagnostics;
11
using System.Linq;
12
using Rdmp.Core.Curation.Data;
13
using Rdmp.Core.Curation.Data.DataLoad;
14
using Rdmp.Core.DataFlowPipeline;
15
using Rdmp.Core.DataFlowPipeline.Requirements;
16
using Rdmp.Core.ReusableLibraryCode.Checks;
17
using Rdmp.Core.ReusableLibraryCode.Progress;
18

19
namespace Rdmp.Core.DataLoad.Engine.Pipeline.Components.Anonymisation;
20

21
/// <summary>
22
/// Pipeline component for anonymising DataTable batches in memory according to the configuration of ANOTables / PreLoadDiscardedColumn(s) in the TableInfo.
23
/// Actual functionality is implemented in IdentifierDumper and ANOTransformer(s).
24
/// </summary>
25
public class BasicAnonymisationEngine : IPluginDataFlowComponent<DataTable>, IPipelineRequirement<TableInfo>
26
{
27
    private bool _bInitialized;
28

29
    private Dictionary<string, ANOTransformer> columnsToAnonymise = new();
46✔
30

31
    private IdentifierDumper _dumper;
32

33
    public TableInfo TableToLoad { get; set; }
184✔
34

35
    public void PreInitialize(TableInfo target, IDataLoadEventListener listener)
36
    {
37
        TableToLoad = target;
92✔
38
        _bInitialized = true;
92✔
39

40
        _dumper = new IdentifierDumper(TableToLoad);
92✔
41
        _dumper.CreateSTAGINGTable();
92✔
42

43
        //columns we expect to ANO
44
        foreach (var columnInfo in target.ColumnInfos)
1,704✔
45
        {
46
            var columnName = columnInfo.GetRuntimeName();
760✔
47

48
            if (columnInfo.ANOTable_ID != null)
760!
49
            {
50
                //The metadata says this column should be ANOd
51
                if (!columnName.StartsWith(ANOTable.ANOPrefix))
×
52
                    throw new Exception(
×
53
                        $"ColumnInfo  {columnName} does not start with ANO but is marked as an ANO column (ID={columnInfo.ID})");
×
54

55
                //if the column is ANOGp then look for column Gp in the input columns (DataTable toProcess)
56
                columnName = columnName[ANOTable.ANOPrefix.Length..];
×
57
                columnsToAnonymise.Add(columnName, new ANOTransformer(columnInfo.ANOTable));
×
58
            }
59
        }
60
    }
92✔
61

62
    private int recordsProcessedSoFar;
63

64
    private Stopwatch stopwatch_TimeSpentTransforming = new();
46✔
65
    private Stopwatch stopwatch_TimeSpentDumping = new();
46✔
66

67
    public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener,
68
        GracefulCancellationToken cancellationToken)
69
    {
70
        var didAno = false;
46✔
71

72
        stopwatch_TimeSpentTransforming.Start();
46✔
73

74
        if (!_bInitialized)
46!
75
            throw new Exception("Not Initialized yet");
×
76

77
        recordsProcessedSoFar += toProcess.Rows.Count;
46✔
78

79
        var missingColumns = columnsToAnonymise.Keys
46✔
80
            .Where(k => !toProcess.Columns.Cast<DataColumn>().Any(c => c.ColumnName.Equals(k))).ToArray();
46✔
81

82
        if (missingColumns.Any())
46!
83
            throw new KeyNotFoundException(
×
84
                $"The following columns (which have ANO Transforms on them) were missing from the DataTable:{Environment.NewLine}{string.Join(Environment.NewLine, missingColumns)}{Environment.NewLine}The columns found in the DataTable were:{Environment.NewLine}{string.Join(Environment.NewLine, toProcess.Columns.Cast<DataColumn>().Select(c => c.ColumnName))}");
×
85

86
        //Dump Identifiers
87
        stopwatch_TimeSpentDumping.Start();
46✔
88
        _dumper.DumpAllIdentifiersInTable(
46✔
89
            toProcess); //do the dumping of all the rest of the columns (those that must disappear from pipeline as opposed to those above which were substituted for ANO versions)
46✔
90
        stopwatch_TimeSpentDumping.Stop();
46✔
91

92
        if (_dumper.HaveDumpedRecords)
46!
93
            listener.OnProgress(this,
×
94
                new ProgressEventArgs("Dump Identifiers",
×
95
                    new ProgressMeasurement(recordsProcessedSoFar, ProgressType.Records),
×
96
                    stopwatch_TimeSpentDumping.Elapsed)); //time taken to dump identifiers
×
97

98
        //Process ANO Identifier Substitutions
99
        //for each column with an ANOTransformer
100
        foreach (var (column, transformer) in columnsToAnonymise)
92!
101
        {
102
            didAno = true;
×
103

104
            //add an ANO version
105
            var ANOColumn = new DataColumn($"{ANOTable.ANOPrefix}{column}");
×
106
            toProcess.Columns.Add(ANOColumn);
×
107

108
            //populate ANO version
109
            transformer.Transform(toProcess, toProcess.Columns[column], ANOColumn);
×
110

111
            //drop the non ANO version
112
            toProcess.Columns.Remove(column);
×
113
        }
114

115
        stopwatch_TimeSpentTransforming.Stop();
46✔
116

117
        if (didAno)
46!
118
            listener.OnProgress(this,
×
119
                new ProgressEventArgs("Anonymise Identifiers",
×
120
                    new ProgressMeasurement(recordsProcessedSoFar, ProgressType.Records),
×
121
                    stopwatch_TimeSpentTransforming.Elapsed)); //time taken to swap ANO identifiers
×
122

123
        return toProcess;
46✔
124
    }
125

126
    public void Dispose(IDataLoadEventListener listener, Exception pipelineFailureExceptionIfAny)
127
    {
128
        _dumper.DropStaging();
46✔
129
    }
46✔
130

131
    public void Abort(IDataLoadEventListener listener)
132
    {
133
        _dumper.DropStaging();
×
134
    }
×
135

136
    public bool SilentRunning { get; set; }
×
137

138
    public void Check(ICheckNotifier notifier)
139
    {
140
    }
×
141
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc