• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 12136273647

03 Dec 2024 09:03AM UTC coverage: 57.448% (+0.07%) from 57.382%
12136273647

push

github

JFriel
Merge branch 'develop' of https://github.com/HicServices/RDMP

11297 of 21215 branches covered (53.25%)

Branch coverage included in aggregate %.

439 of 673 new or added lines in 30 files covered. (65.23%)

5 existing lines in 4 files now uncovered.

32146 of 54407 relevant lines covered (59.08%)

17095.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.69
/Rdmp.Core/DataLoad/Modules/Mutilators/RegexRedactionMutilator.cs
1
// Copyright (c) The University of Dundee 2024-2024
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using FAnsi.Discovery;
8
using Rdmp.Core.Curation.Data;
9
using Rdmp.Core.Curation.Data.DataLoad;
10
using Rdmp.Core.Curation.DataHelper.RegexRedaction;
11
using Rdmp.Core.DataLoad.Engine.Job;
12
using Rdmp.Core.ReusableLibraryCode.Progress;
13
using System;
14
using System.Data;
15
using System.Linq;
16
using System.Text.RegularExpressions;
17
using Rdmp.Core.ReusableLibraryCode.DataAccess;
18

19
namespace Rdmp.Core.DataLoad.Modules.Mutilators;
20
/// <summary>
21
/// Redacts an incoming data table 
22
/// </summary>
23
public class RegexRedactionMutilator : MatchingTablesMutilatorWithDataLoadJob
24
{
25
    [DemandsInitialization("The regex redaction configuration to use")]
26
    public RegexRedactionConfiguration RedactionConfiguration { get; set; }
2,000,048✔
27

28
    [DemandsInitialization(
29
       "All Columns matching this pattern which have a ColumnInfo defined in the load will be affected by this mutilation",
30
       DefaultValue = ".*")]
31
    public Regex ColumnRegexPattern { get; set; }
188✔
32

33
    [DemandsInitialization(
34
        "Overrides ColumnRegexPattern.  If this is set then the columns chosen will be mutilated instead")]
35
    public ColumnInfo[] OnlyColumns { get; set; }
28✔
36

37
    private DiscoveredColumn[] _discoveredPKColumns;
38

39
    public RegexRedactionMutilator() : base([LoadStage.AdjustRaw, LoadStage.AdjustStaging]) { }
56✔
40

41
    private bool ColumnMatches(DiscoveredColumn column)
42
    {
43
        if (ColumnRegexPattern != null)
40!
44
        {
45
            ColumnRegexPattern = new Regex(ColumnRegexPattern.ToString(), RegexOptions.IgnoreCase);
40✔
46
            return ColumnRegexPattern.IsMatch(column.GetRuntimeName());
40✔
47
        }
NEW
48
        else if (OnlyColumns is not null && OnlyColumns.Length > 0)
×
49
        {
NEW
50
            return OnlyColumns.Select(c => c.GetRuntimeName()).Contains(column.GetRuntimeName());
×
51
        }
52

NEW
53
        return false;
×
54
    }
55

56
    protected override void MutilateTable(IDataLoadJob job, ITableInfo tableInfo, DiscoveredTable table)
57
    {
58
        DataTable redactionsToSaveTable = RegexRedactionHelper.GenerateRedactionsDataTable();
14✔
59
        DataTable pksToSave = RegexRedactionHelper.GeneratePKDataTable();
14✔
60

61
        var columns = table.DiscoverColumns();
14✔
62

63
        var relatedCatalogues = tableInfo.GetAllRelatedCatalogues();
14✔
64
        var cataloguePks = relatedCatalogues.SelectMany(c => c.CatalogueItems).Where(ci => ci.ColumnInfo.IsPrimaryKey).ToList();
70✔
65
        _discoveredPKColumns = columns.Where(c => cataloguePks.Select(cpk => cpk.Name).Contains(c.GetRuntimeName())).ToArray();
122✔
66
        if (_discoveredPKColumns.Length == 0)
14!
67
        {
NEW
68
            job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "No Primary Keys found. Redaction cannot be perfomed without a primary key."));
×
69
            //Don't want to fail the data load, but just let the user know
NEW
70
            return;
×
71
        }
72
        var pkColumnInfos = cataloguePks.Select(c => c.ColumnInfo);
230✔
73
        //if you would match a pk
74
        var matchedOnPk = false;
14✔
75
        foreach (var column in columns.Where(c => pkColumnInfos.Select(c => c.GetRuntimeName()).Contains(c.GetRuntimeName())))
188✔
76
        {
77
            if (ColumnMatches(column))
26✔
78
            {
79
                matchedOnPk = true;
22✔
80
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Would match on column '{column.GetRuntimeName()}' but it is a primary key"));
22✔
81
            }
82
        }
83

84
        var nonPKColumns = columns.Where(c => !pkColumnInfos.Select(c => c.GetRuntimeName()).Contains(c.GetRuntimeName()));
222✔
85
        if(!nonPKColumns.Any() && matchedOnPk)
14!
86
        {
NEW
87
            job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Regex Redaction matched only Primary Key columns. They will not be redacted. Consider updating your configuration."));
×
88
        }
89
        foreach (var column in nonPKColumns)
52✔
90
        {
91
            if (ColumnMatches(column))
14✔
92
            {
93
                var pkSeparator = pkColumnInfos.Any() ? "," : "";
12!
94
                var sql = @$"
12✔
95
                    SELECT {column.GetRuntimeName()} {pkSeparator} {string.Join(", ", pkColumnInfos.Select(c => c.GetRuntimeName()))}
22✔
96
                    FROM {table.GetRuntimeName()}
12✔
97
                    WHERE {column.GetRuntimeName()} LIKE '%{RedactionConfiguration.RegexPattern}%' COLLATE Latin1_General_BIN
12✔
98
                    ";
12✔
99
                var dt = new DataTable();
12✔
100
                dt.BeginLoadData();
12✔
101
                var conn = table.Database.Server.GetConnection();
12✔
102
                conn.Open();
12✔
103
                using (var cmd = table.Database.Server.GetCommand(sql, conn))
12✔
104
                {
105
                    cmd.CommandTimeout = Timeout * 1000;
12✔
106
                    using var da = table.Database.Server.GetDataAdapter(cmd);
12✔
107
                    da.Fill(dt);
12✔
108
                }
109
                dt.EndLoadData();
12✔
110
                var redactionUpates = dt.Clone();
12✔
111
                var columnInfo = relatedCatalogues.SelectMany(c => c.CatalogueItems).ToArray().Select(ci => ci.ColumnInfo).Where(ci => ci.GetRuntimeName() == column.GetRuntimeName()).FirstOrDefault();
92✔
112
                if (columnInfo is null)
12!
113
                {
NEW
114
                    job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Unable to find the related column info"));
×
NEW
115
                    return;
×
116
                }
117
                foreach (DataRow row in dt.Rows)
4,000,040✔
118
                {
119
                    try
120
                    {
121
                        RegexRedactionHelper.Redact(columnInfo, row, cataloguePks, RedactionConfiguration, redactionsToSaveTable, pksToSave, redactionUpates);
2,000,008✔
122
                    }
2,000,006✔
123
                    catch (Exception e)
2✔
124
                    {
125
                        job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, $"{e.Message}"));
2!
126

127
                    }
2✔
128
                }
129
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Regex Redaction mutilator found {dt.Rows.Count} redactions."));
12✔
130
                if (redactionsToSaveTable.Rows.Count == 0) return;
16✔
131
                for (int i = 0; i < pksToSave.Rows.Count; i++)
8,000,048✔
132
                {
133
                    pksToSave.Rows[i]["ID"] = i + 1;
4,000,016✔
134
                }
135
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Creating Temporary tables"));
8✔
136
                var t1 = table.Database.CreateTable(nameof(RegexRedactionHelper.Constants.pksToSave_Temp), pksToSave);
8✔
137
                var t2 = table.Database.CreateTable(nameof(RegexRedactionHelper.Constants.redactionsToSaveTable_Temp), redactionsToSaveTable);
8✔
138
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Saving Redactions"));
8✔
139
                var _server = relatedCatalogues.First().GetDistinctLiveDatabaseServer(DataAccessContext.InternalDataProcessing, false);
8✔
140
                RegexRedactionHelper.SaveRedactions(job.RepositoryLocator.CatalogueRepository, t1, t2, _server, Timeout * 1000);
8✔
141
                t1.Drop();
8✔
142
                t2.Drop();
8✔
143
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Performing join update"));
8✔
144
                RegexRedactionHelper.DoJoinUpdate(columnInfo, table, table.Database.Server, redactionUpates, _discoveredPKColumns, Timeout * 1000);
8✔
145
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Regex Redactions tool found {dt.Rows.Count} redactions."));
8✔
146
            }
147
        }
148
    }
14✔
149
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc