• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 12050533881

27 Nov 2024 12:42PM UTC coverage: 57.449% (+0.06%) from 57.386%
12050533881

Pull #2006

github

web-flow
Task/rdmp 32 regex redaction (#2009)

* interim

* interim

* redact and add some tests

* add missing interface

* add regex redaction keys

* add helper

* add concept

* update interfaces

* add icons and fix restore

* add missing files

* add docs

* add regions

* add ignore

* add speed test

* improve speed

* improve speed

* update padding

* stress test

* add test case

* add fk names

* faster data load

* faster update

* more timers

* super fast redaction

* tidy up code

* woking keys

* improved

* add limiting

* shared helper

* shared

* interim

* working

* add summary

* restore tests working

* add tests

* start og ui update

* start of ui

* basic configuraion

* working add

* user interface

* update for tests

* update test

* tests

* tidy up

* updates

* fix merge

* ad dicons

* add missing icons

* tidyup

* fix tests

* add missing file

* update migration number

* add changelog

* add timeout

* fix build

* interim

* tidy up

* tidy up code

* add publish

* add todo

* add redact all

* add column

* update table

* add docs

* update markdown

* fix typo

* fix typos

* tidy up tests

* tidy up tests

* Include regex redaction doc in VS Solution file

* Update ExecuteCommandPerformRegexRedactionOnCatalogueTests.cs

Syntax cleanup

* Update ThrowImmediatelyActivator.cs

Remove obsolete comment

* Update ExecuteCommandPerformRegexRedactionOnCatalogueTests.cs

Start being green and recycling

* Update ExecuteCommandPerformRegexRedactionOnCatalogueTests.cs

Finish recycling test objects

---------

Co-authored-by: James A Sutherland <j@sutherland.pw>
Pull Request #2006: Release: 8.4.0

11298 of 21213 branches covered (53.26%)

Branch coverage included in aggregate %.

439 of 675 new or added lines in 30 files covered. (65.04%)

5 existing lines in 4 files now uncovered.

32146 of 54409 relevant lines covered (59.08%)

17122.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

16.1
/Rdmp.Core/Curation/Data/Overview/OverviewModel.cs
1
// Copyright (c) The University of Dundee 2024-2024
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using NPOI.SS.Formula.Functions;
8
using Rdmp.Core.CommandExecution;
9
using Rdmp.Core.DataExport.Data;
10
using Rdmp.Core.DataLoad.Triggers;
11
using Rdmp.Core.DataViewing;
12
using Rdmp.Core.Logging;
13
using Rdmp.Core.QueryBuilding;
14
using Rdmp.Core.Repositories;
15
using Rdmp.Core.ReusableLibraryCode.DataAccess;
16
using System;
17
using System.Collections;
18
using System.Collections.Generic;
19
using System.Data;
20
using System.Linq;
21

22
namespace Rdmp.Core.Curation.Data.Overview;
23

24
/// <summary>
25
/// Used to populate information about a catalogue for use in the overview UI
26
/// </summary>
27
public class OverviewModel
28
{
29

30
    private ICatalogue _catalogue;
31
    private IBasicActivateItems _activator;
32

33
    private DataTable _dataLoads;
34

35
    private int _numberOfPeople;
36
    private int _numberOfRecords;
37

38
    public OverviewModel(IBasicActivateItems activator, ICatalogue catalogue)
2✔
39
    {
40
        _activator = activator;
2✔
41
        _catalogue = catalogue;
2✔
42
        if (catalogue != null)
2✔
43
        {
44
            Regen("");
2✔
45
        }
46
    }
2✔
47

48
    public void Regen(string whereClause)
49
    {
50
        DataTable dt = new();
2✔
51
        bool hasExtractionIdentifier = true;
2✔
52
        var column = _catalogue.CatalogueItems.Where(ci => ci.ExtractionInformation.IsExtractionIdentifier).FirstOrDefault();
2✔
53
        if (column is null)
2✔
54
        {
55
            column = _catalogue.CatalogueItems.FirstOrDefault();
2✔
56
            hasExtractionIdentifier = false;
2✔
57
        }
58
        if (column is null) return;
4!
NEW
59
        var discoveredColumn = column.ColumnInfo.Discover(DataAccessContext.InternalDataProcessing);
×
NEW
60
        var server = discoveredColumn.Table.Database.Server;
×
NEW
61
        using var con = server.GetConnection();
×
NEW
62
        con.Open();
×
NEW
63
        string populatedWhere = !string.IsNullOrWhiteSpace(whereClause) ? $"WHERE {whereClause}" : "";
×
NEW
64
        var sql = $"SELECT {column.ColumnInfo.GetRuntimeName()} FROM {discoveredColumn.Table.GetRuntimeName()} {populatedWhere}";
×
NEW
65
        using var cmd = server.GetCommand(sql, con);
×
NEW
66
        cmd.CommandTimeout = 30000;
×
NEW
67
        using var da = server.GetDataAdapter(cmd);
×
NEW
68
        dt.BeginLoadData();
×
NEW
69
        da.Fill(dt);
×
NEW
70
        dt.EndLoadData();
×
NEW
71
        con.Dispose();
×
NEW
72
        _numberOfRecords = dt.Rows.Count;
×
NEW
73
        _numberOfPeople = hasExtractionIdentifier ? dt.DefaultView.ToTable(true, column.ColumnInfo.GetRuntimeName()).Rows.Count : 0;
×
NEW
74
        GetDataLoads();
×
NEW
75
    }
×
76

77
    public int GetNumberOfRecords()
78
    {
NEW
79
        return _numberOfRecords;
×
80
    }
81

82
    public int GetNumberOfPeople()
83
    {
NEW
84
        return _numberOfPeople;
×
85
    }
86

87
    public Tuple<DateTime, DateTime> GetStartEndDates(ColumnInfo dateColumn, string whereClause)
88
    {
NEW
89
        DataTable dt = new();
×
90

NEW
91
        var discoveredColumn = _catalogue.CatalogueItems.First().ColumnInfo.Discover(DataAccessContext.InternalDataProcessing);
×
NEW
92
        var server = discoveredColumn.Table.Database.Server;
×
NEW
93
        var populatedWhereClause = !string.IsNullOrWhiteSpace(whereClause) ? $"WHERE {whereClause}" : "";
×
NEW
94
        using var con = server.GetConnection();
×
NEW
95
        con.Open();
×
NEW
96
        if (server.DatabaseType == FAnsi.DatabaseType.MicrosoftSQLServer)
×
97
        {
NEW
98
            var sql = $@"
×
NEW
99
        select min({dateColumn.GetRuntimeName()}) as min, max({dateColumn.GetRuntimeName()}) as max
×
NEW
100
        from
×
NEW
101
        (select {dateColumn.GetRuntimeName()},
×
NEW
102
        count(1) over (partition by year({dateColumn.GetRuntimeName()})) as occurs 
×
NEW
103
        from {discoveredColumn.Table.GetRuntimeName()} {populatedWhereClause}) as t
×
NEW
104
        where occurs >1
×
NEW
105
        ";
×
106

NEW
107
            using var cmd = server.GetCommand(sql, con);
×
NEW
108
            cmd.CommandTimeout = 30000;
×
NEW
109
            using var da = server.GetDataAdapter(cmd);
×
NEW
110
            dt.BeginLoadData();
×
NEW
111
            da.Fill(dt);
×
NEW
112
            dt.EndLoadData();
×
113
        }
114
        else
115
        {
NEW
116
            var repo = new MemoryCatalogueRepository();
×
NEW
117
            var qb = new QueryBuilder(null, null);
×
NEW
118
            qb.AddColumn(new ColumnInfoToIColumn(repo, dateColumn));
×
NEW
119
            qb.AddCustomLine($"{dateColumn.Name} IS NOT NULL", FAnsi.Discovery.QuerySyntax.QueryComponent.WHERE);
×
NEW
120
            var cmd = server.GetCommand(qb.SQL, con);
×
NEW
121
            using var da = server.GetDataAdapter(cmd);
×
NEW
122
            dt.BeginLoadData();
×
NEW
123
            da.Fill(dt);
×
NEW
124
            var latest = dt.AsEnumerable()
×
NEW
125
               .Max(r => r.Field<DateTime>(dateColumn.Name));
×
NEW
126
            var earliest = dt.AsEnumerable()
×
NEW
127
              .Min(r => r.Field<DateTime>(dateColumn.Name));
×
NEW
128
            dt = new();
×
NEW
129
            dt.Rows.Add([earliest, latest]);
×
130
        }
NEW
131
        con.Dispose();
×
NEW
132
        return new Tuple<DateTime, DateTime>(DateTime.Parse(dt.Rows[0].ItemArray[0].ToString()), DateTime.Parse(dt.Rows[0].ItemArray[1].ToString()));
×
NEW
133
    }
×
134

135

136
    public static DataTable GetCountsByDatePeriod(ColumnInfo dateColumn, string datePeriod, string optionalWhere = "")
137
    {
NEW
138
        DataTable dt = new();
×
NEW
139
        if (!(new[] { "Day", "Month", "Year" }).Contains(datePeriod))
×
140
        {
NEW
141
            throw new Exception("Invalid Date period");
×
142
        }
NEW
143
        var discoveredColumn = dateColumn.Discover(DataAccessContext.InternalDataProcessing);
×
NEW
144
        var server = discoveredColumn.Table.Database.Server;
×
NEW
145
        using var con = server.GetConnection();
×
NEW
146
        con.Open();
×
NEW
147
        var dateString = "yyyy-MM";
×
148
        switch (datePeriod)
149
        {
150
            case "Day":
NEW
151
                dateString = "yyyy-MM-dd";
×
NEW
152
                break;
×
153
            case "Month":
NEW
154
                dateString = "yyyy-MM";
×
NEW
155
                break;
×
156
            case "Year":
NEW
157
                dateString = "yyyy";
×
158
                break;
159
        }
NEW
160
        if (server.DatabaseType == FAnsi.DatabaseType.MicrosoftSQLServer)
×
161
        {
NEW
162
            var sql = @$"
×
NEW
163
        SELECT format({dateColumn.GetRuntimeName()}, '{dateString}') as YearMonth, count(*) as '# Records'
×
NEW
164
        FROM {discoveredColumn.Table.GetRuntimeName()}
×
NEW
165
        WHERE {dateColumn.GetRuntimeName()} IS NOT NULL
×
NEW
166
        {(optionalWhere != "" ? "AND" : "")} {optionalWhere.Replace('"', '\'')}
×
NEW
167
        GROUP BY format({dateColumn.GetRuntimeName()}, '{dateString}')
×
NEW
168
        ORDER BY 1
×
NEW
169
        ";
×
170

NEW
171
            using var cmd = server.GetCommand(sql, con);
×
NEW
172
            cmd.CommandTimeout = 30000;
×
NEW
173
            using var da = server.GetDataAdapter(cmd);
×
NEW
174
            dt.BeginLoadData();
×
NEW
175
            da.Fill(dt);
×
NEW
176
            dt.EndLoadData();
×
177
        }
178
        else
179
        {
NEW
180
            var repo = new MemoryCatalogueRepository();
×
NEW
181
            var qb = new QueryBuilder(null, null);
×
NEW
182
            qb.AddColumn(new ColumnInfoToIColumn(repo, dateColumn));
×
NEW
183
            qb.AddCustomLine($"{dateColumn.Name} IS NOT NULL", FAnsi.Discovery.QuerySyntax.QueryComponent.WHERE);
×
NEW
184
            var cmd = server.GetCommand(qb.SQL, con);
×
NEW
185
            using var da = server.GetDataAdapter(cmd);
×
NEW
186
            dt.BeginLoadData();
×
NEW
187
            da.Fill(dt);
×
NEW
188
            Dictionary<string, int> counts = [];
×
NEW
189
            foreach (var row in dt.AsEnumerable())
×
190
            {
NEW
191
                var datetime = DateTime.Parse(row.ItemArray[0].ToString());
×
NEW
192
                var key = datetime.ToString(dateString);
×
NEW
193
                counts[key]++;
×
194
            }
NEW
195
            dt = new DataTable();
×
NEW
196
            foreach (var item in counts)
×
197
            {
NEW
198
                DataRow dr = dt.NewRow();
×
NEW
199
                dr["YearMonth"] = item.Key;
×
NEW
200
                dr["# Records"] = item.Value;
×
NEW
201
                dt.Rows.Add(dr);
×
202
            }
NEW
203
            dt.EndLoadData();
×
204

205
        }
NEW
206
        con.Dispose();
×
NEW
207
        return dt;
×
NEW
208
    }
×
209

210
    private void GetDataLoads()
211
    {
212
        _dataLoads = new();
2✔
213
        var repo = new MemoryCatalogueRepository();
2✔
214
        var qb = new QueryBuilder(null, null);
2✔
215
        var columnInfo = _catalogue.CatalogueItems.Where(c => c.Name == SpecialFieldNames.DataLoadRunID).Select(c => c.ColumnInfo).FirstOrDefault();
2✔
216
        if (columnInfo != null)
2!
217
        {
NEW
218
            qb.AddColumn(new ColumnInfoToIColumn(repo, columnInfo));
×
NEW
219
            qb.AddCustomLine($"{columnInfo.Name} IS NOT NULL", FAnsi.Discovery.QuerySyntax.QueryComponent.WHERE);
×
NEW
220
            var sql = qb.SQL;
×
NEW
221
            var server = columnInfo.Discover(DataAccessContext.InternalDataProcessing).Table.Database.Server;
×
NEW
222
            using var con = server.GetConnection();
×
NEW
223
            con.Open();
×
224

NEW
225
            using var cmd = server.GetCommand(sql, con);
×
NEW
226
            cmd.CommandTimeout = 30000;
×
NEW
227
            using var da = server.GetDataAdapter(cmd);
×
NEW
228
            _dataLoads.BeginLoadData();
×
NEW
229
            da.Fill(_dataLoads);
×
NEW
230
            _dataLoads.EndLoadData();
×
231
        }
232

233
    }
2✔
234

235
    public DataTable GetMostRecentDataLoad()
236
    {
237
        if (_dataLoads == null) GetDataLoads();
6✔
238
        if (_dataLoads.Rows.Count == 0) return null;
8!
NEW
239
        var maxDataLoadId = _dataLoads.AsEnumerable().Select(r => int.Parse(r[0].ToString())).Distinct().Max();
×
NEW
240
        var loggingServers = _activator.RepositoryLocator.CatalogueRepository.GetAllObjectsWhere<ExternalDatabaseServer>("CreatedByAssembly", "Rdmp.Core/Databases.LoggingDatabase");
×
NEW
241
        var columnInfo = _catalogue.CatalogueItems.Where(c => c.Name == SpecialFieldNames.DataLoadRunID).Select(c => c.ColumnInfo).First();
×
NEW
242
        var server = columnInfo.Discover(DataAccessContext.InternalDataProcessing).Table.Database.Server;
×
243

NEW
244
        DataTable dt = new();
×
NEW
245
        foreach (var loggingServer in loggingServers)
×
246
        {
NEW
247
            var logCollection = new ViewLogsCollection(loggingServer, new LogViewerFilter(LoggingTables.DataLoadRun));
×
NEW
248
            var dataLoadRunSql = $"{logCollection.GetSql()} WHERE ID={maxDataLoadId}";
×
NEW
249
            var logServer = loggingServer.Discover(DataAccessContext.InternalDataProcessing).Server;
×
NEW
250
            using var loggingCon = logServer.GetConnection();
×
NEW
251
            loggingCon.Open();
×
NEW
252
            using var loggingCmd = logServer.GetCommand(dataLoadRunSql, loggingCon);
×
NEW
253
            loggingCmd.CommandTimeout = 30000;
×
NEW
254
            using var loggingDa = server.GetDataAdapter(loggingCmd);
×
NEW
255
            dt.BeginLoadData();
×
NEW
256
            loggingDa.Fill(dt);
×
NEW
257
            dt.EndLoadData();
×
NEW
258
            loggingCon.Dispose();
×
NEW
259
            if (dt.Rows.Count > 0)
×
260
            {
NEW
261
                break;
×
262
            }
263
        }
NEW
264
        return dt;
×
265
    }
266

267
    public List<CumulativeExtractionResults> GetExtractions()
268
    {
269
        var datasets = _activator.RepositoryLocator.DataExportRepository.GetAllObjectsWhere<ExtractableDataSet>("Catalogue_ID", _catalogue.ID).Select(d => d.ID);
4✔
270
        var results = _activator.RepositoryLocator.DataExportRepository.GetAllObjects<CumulativeExtractionResults>().Where(result => datasets.Contains(result.ExtractableDataSet_ID)).ToList();
16✔
271
        return results;
4✔
272

273
    }
274

275
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc