• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 12050533881

27 Nov 2024 12:42PM UTC coverage: 57.449% (+0.06%) from 57.386%
12050533881

Pull #2006

github

web-flow
Task/rdmp 32 regex redaction (#2009)

* interim

* interim

* redact and add some tests

* add missing interface

* add regex redaction keys

* add helper

* add concept

* update interfaces

* add icons and fix restore

* add missing files

* add docs

* add regions

* add ignore

* add speed test

* improve speed

* improve speed

* update padding

* stress test

* add test case

* add fk names

* faster data load

* faster update

* more timers

* super fast redaction

* tidy up code

* woking keys

* improved

* add limiting

* shared helper

* shared

* interim

* working

* add summary

* restore tests working

* add tests

* start og ui update

* start of ui

* basic configuraion

* working add

* user interface

* update for tests

* update test

* tests

* tidy up

* updates

* fix merge

* ad dicons

* add missing icons

* tidyup

* fix tests

* add missing file

* update migration number

* add changelog

* add timeout

* fix build

* interim

* tidy up

* tidy up code

* add publish

* add todo

* add redact all

* add column

* update table

* add docs

* update markdown

* fix typo

* fix typos

* tidy up tests

* tidy up tests

* Include regex redaction doc in VS Solution file

* Update ExecuteCommandPerformRegexRedactionOnCatalogueTests.cs

Syntax cleanup

* Update ThrowImmediatelyActivator.cs

Remove obsolete comment

* Update ExecuteCommandPerformRegexRedactionOnCatalogueTests.cs

Start being green and recycling

* Update ExecuteCommandPerformRegexRedactionOnCatalogueTests.cs

Finish recycling test objects

---------

Co-authored-by: James A Sutherland <j@sutherland.pw>
Pull Request #2006: Release: 8.4.0

11298 of 21213 branches covered (53.26%)

Branch coverage included in aggregate %.

439 of 675 new or added lines in 30 files covered. (65.04%)

5 existing lines in 4 files now uncovered.

32146 of 54409 relevant lines covered (59.08%)

17122.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.9
/Rdmp.Core/Curation/DataHelper/RegexRedaction/RegexRedactionHelper.cs
1
// Copyright (c) The University of Dundee 2024-2024
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using FAnsi.Discovery.QuerySyntax;
8
using FAnsi.Discovery;
9
using Rdmp.Core.Curation.Data;
10
using Rdmp.Core.MapsDirectlyToDatabaseTable;
11
using System;
12
using System.Collections.Generic;
13
using System.Data;
14
using System.Linq;
15
using System.Text.RegularExpressions;
16
using Rdmp.Core.Repositories;
17

18
namespace Rdmp.Core.Curation.DataHelper.RegexRedaction
19
{
20
    public static class RegexRedactionHelper
21
    {
22
        public enum Constants {
23
            pksToSave_Temp,
24
            redactionsToSaveTable_Temp,
25
            ID,
26
            RedactionConfiguration_ID,
27
            ColumnInfo_ID,
28
            startingIndex,
29
            ReplacementValue,
30
            RedactedValue, 
31
            RegexRedaction_ID,
32
            Value,
33
            TEMP_RedactionUpdates
34
        };
35

36

37
        public static DataTable GenerateRedactionsDataTable()
38
        {
39
            DataTable redactionsToSaveTable = new DataTable();
34✔
40
            redactionsToSaveTable.Columns.Add(nameof(Constants.RedactionConfiguration_ID));
34✔
41
            redactionsToSaveTable.Columns.Add(nameof(Constants.ColumnInfo_ID));
34✔
42
            redactionsToSaveTable.Columns.Add(nameof(Constants.startingIndex));
34✔
43
            redactionsToSaveTable.Columns.Add(nameof(Constants.ReplacementValue));
34✔
44
            redactionsToSaveTable.Columns.Add(nameof(Constants.RedactedValue));
34✔
45
            return redactionsToSaveTable;
34✔
46
        }
47

48
        public static DataTable GeneratePKDataTable()
49
        {
50
            DataTable pkDataTable = new DataTable();
34✔
51
            pkDataTable.Columns.Add(nameof(Constants.RegexRedaction_ID));
34✔
52
            pkDataTable.Columns.Add(nameof(Constants.ColumnInfo_ID));
34✔
53
            pkDataTable.Columns.Add(nameof(Constants.Value));
34✔
54
            pkDataTable.Columns.Add(nameof(Constants.ID), typeof(int));
34✔
55
            return pkDataTable;
34✔
56
        }
57

58
        public static string ConvertPotentialDateTimeObject(string value, string currentColumnType)
59
        {
60
            var matchValue = $"'{value}'";
20✔
61
            if (currentColumnType == "datetime2" || currentColumnType == "datetime")
20!
62
            {
NEW
63
                var x = DateTime.Parse(value);
×
NEW
64
                var format = "yyyy-MM-dd HH:mm:ss:fff";
×
NEW
65
                matchValue = $"'{x.ToString(format)}'";
×
66
            }
67
            return matchValue;
20✔
68
        }
69

70
        public static string GetRedactionValue(string value, ColumnInfo column, DataRow m, List<CatalogueItem> _cataloguePKs, RegexRedactionConfiguration _redactionConfiguration, DataTable redactionsToSaveTable, DataTable pksToSave, DataTable redactionUpates)
71
        {
72

73
            Dictionary<ColumnInfo, string> pkLookup = Enumerable.Range(0, _cataloguePKs.Count).ToDictionary(i => _cataloguePKs[i].ColumnInfo, i => m[i + 1].ToString());
10,000,104✔
74
            var matches = Regex.Matches(value, _redactionConfiguration.RegexPattern);
2,000,028✔
75
            var offset = 0;
2,000,028✔
76
            foreach (var match in matches)
8,000,120✔
77
            {
78
                var foundMatch = match.ToString();
2,000,034✔
79
                var startingIndex = value.IndexOf(foundMatch);
2,000,034✔
80
                string replacementValue = _redactionConfiguration.RedactionString;
2,000,034✔
81

82
                var lengthDiff = (float)foundMatch.Length - replacementValue.Length;
2,000,034✔
83
                if (lengthDiff < 0)
2,000,034✔
84
                {
85
                    throw new Exception($"Redaction string '{_redactionConfiguration.RedactionString}' is longer than found match '{foundMatch}'.");
4✔
86
                }
87
                if (lengthDiff > 0)
2,000,030✔
88
                {
89
                    var start = (int)Math.Floor(lengthDiff / 2);
2,000,030✔
90
                    var end = (int)Math.Ceiling(lengthDiff / 2);
2,000,030✔
91
                    replacementValue = replacementValue.PadLeft(start + replacementValue.Length, '<');
2,000,030✔
92
                    replacementValue = replacementValue.PadRight(end + replacementValue.Length, '>');
2,000,030✔
93
                }
94
                value = value[..startingIndex] + replacementValue + value[(startingIndex + foundMatch.Length)..];
2,000,030✔
95
                redactionsToSaveTable.Rows.Add([_redactionConfiguration.ID, column.ID, startingIndex, replacementValue, foundMatch]);
2,000,030✔
96
                foreach (var pk in pkLookup)
12,000,140✔
97
                {
98
                    pksToSave.Rows.Add([redactionUpates.Rows.Count + offset, pk.Key.ID, pk.Value]);
4,000,040✔
99
                }
100
                offset++;
2,000,030✔
101
            }
102

103
            return value;
2,000,024✔
104
        }
105

106
        public static void Redact(ColumnInfo column, DataRow match, List<CatalogueItem> _cataloguePKs, RegexRedactionConfiguration _redactionConfiguration, DataTable redactionsToSaveTable, DataTable pksToSave, DataTable redactionUpates)
107
        {
108

109
            var redactedValue = GetRedactionValue(match[0].ToString(), column, match, _cataloguePKs, _redactionConfiguration, redactionsToSaveTable, pksToSave, redactionUpates);
2,000,028✔
110
            match[0] = redactedValue;
2,000,024✔
111
            redactionUpates.ImportRow(match);
2,000,024✔
112
        }
2,000,024✔
113

114
        public static void SaveRedactions(ICatalogueRepository catalogueRepo, DiscoveredTable pksToSave, DiscoveredTable redactionsToSaveTable, DiscoveredServer _server, int timeout = 30000)
115
        {
116
            var sql = $@"
24✔
117
                DECLARE @output TABLE (id1 int, inc int IDENTITY(1,1))
24✔
118
                INSERT INTO RegexRedaction(RedactionConfiguration_ID,ColumnInfo_ID,startingIndex,ReplacementValue,RedactedValue) OUTPUT inserted.id as id1 INTO @output
24✔
119
                SELECT RedactionConfiguration_ID,ColumnInfo_ID,startingIndex,ReplacementValue,RedactedValue FROM {redactionsToSaveTable.GetFullyQualifiedName()};
24✔
120
                                
24✔
121
                                DECLARE @IDMATCHER TABLE (RegexRedaction_ID int,ColumnInfo_ID int ,Value varchar(max),ID int , id1 int , inc int)
24✔
122
                                insert into @IDMATCHER(RegexRedaction_ID, ColumnInfo_ID,Value,ID,id1,inc)
24✔
123
                                select RegexRedaction_ID, ColumnInfo_ID,Value,ID,id1,inc
24✔
124
                                FROM {pksToSave.GetFullyQualifiedName()} as t1
24✔
125
                                JOIN @output as t2 ON t1.RegexRedaction_ID+1 = t2.inc
24✔
126
                                where t1.RegexRedaction_ID+1 = t2.inc;
24✔
127

24✔
128
                                update @IDMATCHER
24✔
129
                                set RegexRedaction_ID = id1
24✔
130
                                where RegexRedaction_ID+1 = inc;
24✔
131

24✔
132
                INSERT INTO RegexRedactionKey(RegexRedaction_ID,ColumnInfo_ID,Value)
24✔
133
                            select RegexRedaction_ID,ColumnInfo_ID,Value  FROM @IDMATCHER;
24✔
134
            ";
24✔
135
           
136
                (catalogueRepo as TableRepository).Insert(sql, null, timeout);
24✔
137
        }
24✔
138

139
        public static void DoJoinUpdate(ColumnInfo column, DiscoveredTable _discoveredTable, DiscoveredServer _server, DataTable redactionUpates, DiscoveredColumn[] _discoveredPKColumns, int timeout = 30000)
140
        {
141
            var redactionTable = _discoveredTable.Database.CreateTable(nameof(Constants.TEMP_RedactionUpdates), redactionUpates);
24✔
142
            var updateHelper = _server.GetQuerySyntaxHelper().UpdateHelper;
24✔
143

144
            var sqlLines = new List<CustomLine>
24✔
145
        {
24✔
146
            new CustomLine($"t1.{column.GetRuntimeName()} = t2.{column.GetRuntimeName()}", QueryComponent.SET)
24✔
147
        };
24✔
148
            foreach (var pk in _discoveredPKColumns)
116✔
149
            {
150
                sqlLines.Add(new CustomLine($"t1.{pk.GetRuntimeName()} = t2.{pk.GetRuntimeName()}", QueryComponent.WHERE));
34✔
151
                sqlLines.Add(new CustomLine(string.Format("t1.{0} = t2.{0}", pk.GetRuntimeName()), QueryComponent.JoinInfoJoin));
34✔
152

153
            }
154
            var sql = updateHelper.BuildUpdate(_discoveredTable, redactionTable, sqlLines);
24✔
155
            var conn = _server.GetConnection();
24✔
156
            conn.Open();
24✔
157
            using (var cmd = _server.GetCommand(sql, conn))
24✔
158
            {
159
                cmd.CommandTimeout = timeout;
24✔
160
                cmd.ExecuteNonQuery();
24✔
161
            }
24✔
162
            conn.Close();
24✔
163
            redactionTable.Drop();
24✔
164
        }
24✔
165
    }
166
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc