• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 6245535001

20 Sep 2023 07:44AM UTC coverage: 57.013%. First build
6245535001

push

github

web-flow
8.1.0 Release (#1628)

* Bump Newtonsoft.Json from 13.0.1 to 13.0.2

Bumps [Newtonsoft.Json](https://github.com/JamesNK/Newtonsoft.Json) from 13.0.1 to 13.0.2.
- [Release notes](https://github.com/JamesNK/Newtonsoft.Json/releases)
- [Commits](https://github.com/JamesNK/Newtonsoft.Json/compare/13.0.1...13.0.2)

---
updated-dependencies:
- dependency-name: Newtonsoft.Json
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump NLog from 5.0.5 to 5.1.0

Bumps [NLog](https://github.com/NLog/NLog) from 5.0.5 to 5.1.0.
- [Release notes](https://github.com/NLog/NLog/releases)
- [Changelog](https://github.com/NLog/NLog/blob/dev/CHANGELOG.md)
- [Commits](https://github.com/NLog/NLog/compare/v5.0.5...v5.1.0)

---
updated-dependencies:
- dependency-name: NLog
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump NLog from 5.0.5 to 5.1.0

* Fix -r flag - should have been --results-directory all along

* Bump Newtonsoft.Json from 13.0.1 to 13.0.2

* Bump YamlDotNet from 12.0.2 to 12.1.0

Bumps [YamlDotNet](https://github.com/aaubry/YamlDotNet) from 12.0.2 to 12.1.0.
- [Release notes](https://github.com/aaubry/YamlDotNet/releases)
- [Commits](https://github.com/aaubry/YamlDotNet/compare/v12.0.2...v12.1.0)

---
updated-dependencies:
- dependency-name: YamlDotNet
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Moq from 4.18.2 to 4.18.3

Bumps [Moq](https://github.com/moq/moq4) from 4.18.2 to 4.18.3.
- [Release notes](https://github.com/moq/moq4/releases)
- [Changelog](https://github.com/moq/moq4/blob/main/CHANGELOG.md)
- [Commits](https://github.com/moq/moq4/compare/v4.18.2...v4.18.3)

---
updated-dependencies:
- dependency-name: Moq
... (continued)

10732 of 20257 branches covered (0.0%)

Branch coverage included in aggregate %.

48141 of 48141 new or added lines in 1086 files covered. (100.0%)

30685 of 52388 relevant lines covered (58.57%)

7387.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.48
/Rdmp.Core/DataLoad/Modules/DataFlowOperations/CohortSampler.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Collections.Generic;
9
using System.Data;
10
using System.Linq;
11
using Rdmp.Core.CohortCommitting.Pipeline;
12
using Rdmp.Core.Curation.Data;
13
using Rdmp.Core.DataExport.Data;
14
using Rdmp.Core.DataFlowPipeline;
15
using Rdmp.Core.DataFlowPipeline.Requirements;
16
using Rdmp.Core.ReusableLibraryCode.Checks;
17
using Rdmp.Core.ReusableLibraryCode.Progress;
18

19
namespace Rdmp.Core.DataLoad.Modules.DataFlowOperations;
20

21
/// <summary>
22
/// Component for reproducibly pulling a random sample of records from a cohort being committed.  The random number generator
23
/// is seeded on the Project number such that using the sampler again on the same input will produce the same random selection.
24
/// </summary>
25
public class CohortSampler : IPluginDataFlowComponent<DataTable>, IPipelineRequirement<CohortCreationRequest>
26
{
27
    private IExternalCohortTable _ect;
28
    private IProject _project;
29
    private bool _firstBatch = true;
16✔
30

31
    [DemandsInitialization("The number of unique patient identifiers you want returned from the input data",
32
        DefaultValue = 100)]
33
    public int SampleSize { get; set; } = 100;
54✔
34

35
    [DemandsInitialization(
36
        "Determines components behaviour if not enough unique identifiers are being committed.  True to crash.  False to pass on however many records there are.",
37
        DefaultValue = true)]
38
    public bool FailIfNotEnoughIdentifiers { get; set; } = true;
22✔
39

40
    [DemandsInitialization(
41
        "Optional.  The name of the identifier column that you are submitting.  Set this if it is different than the destination cohort private identifier field")]
42
    public string PrivateIdentifierColumnName { get; set; }
20✔
43

44
    public void Abort(IDataLoadEventListener listener)
45
    {
46
    }
×
47

48
    public void Check(ICheckNotifier notifier)
49
    {
50
    }
×
51

52
    public void Dispose(IDataLoadEventListener listener, Exception pipelineFailureExceptionIfAny)
53
    {
54
    }
×
55

56
    public void PreInitialize(CohortCreationRequest value, IDataLoadEventListener listener)
57
    {
58
        _ect = value.NewCohortDefinition.LocationOfCohort;
16✔
59
        _project = value.Project;
16✔
60
    }
16✔
61

62
    public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener,
63
        GracefulCancellationToken cancellationToken)
64
    {
65
        if (!_firstBatch)
16!
66
            throw new Exception(
×
67
                "Expected to get the whole cohort at once but got multiple batches.  This component only works if the Source returns all data at once");
×
68

69
        if (_project.ProjectNumber == null)
16!
70
            throw new Exception(
×
71
                "Project must have a ProjectNumber so that it can be used as a seed in random cohort sampling");
×
72

73
        var expectedFieldName = GetPrivateFieldName();
16✔
74

75
        listener.OnNotify(this,
16✔
76
            new NotifyEventArgs(ProgressEventType.Information,
16✔
77
                $"Looking for column called '{expectedFieldName}' in the data in order to produce a sample"));
16✔
78

79
        if (!toProcess.Columns.Contains(expectedFieldName))
16✔
80
            throw new Exception(
4✔
81
                $"CohortSampler was unable to find a column called '{expectedFieldName}' in the data passed in.  This is the expected private identifier column name of the cohort you are committing.");
4✔
82

83
        // get all the unique values
84
        var uniques = new HashSet<object>();
12✔
85

86
        foreach (DataRow row in toProcess.Rows)
184✔
87
        {
88
            var val = row[expectedFieldName];
80✔
89

90
            if (val != DBNull.Value) uniques.Add(val);
160✔
91
        }
92

93
        _firstBatch = false;
12✔
94

95

96
        var r = new Random(_project.ProjectNumber.Value);
12✔
97

98
        // first order the values e.g. alphabetically so that even if the input is in a different order our
99
        // seeded random picks the same values.  Se test TestCohortSampler_Repeatability_OrderIrrelevant
100
        var sorted = uniques.OrderBy(u => u);
92✔
101

102
#pragma warning disable SCS0005 // Weak random number generator.
103
        var chosen = sorted.OrderBy(v => r.Next()).Take(SampleSize).ToList();
92✔
104
#pragma warning restore SCS0005 // Weak random number generator.
105

106
        if (chosen.Count < SampleSize && FailIfNotEnoughIdentifiers)
12✔
107
            throw new Exception(
2✔
108
                $"Cohort only contains {chosen.Count} unique identifiers.  This is less than the requested sample size of {SampleSize} and {nameof(FailIfNotEnoughIdentifiers)} is true");
2✔
109

110
        var dtToReturn = new DataTable();
10✔
111
        dtToReturn.BeginLoadData();
10✔
112
        dtToReturn.Columns.Add(expectedFieldName);
10✔
113

114
        foreach (var val in chosen) dtToReturn.Rows.Add(val);
110✔
115

116
        dtToReturn.EndLoadData();
10✔
117
        return dtToReturn;
10✔
118
    }
119

120
    private string GetPrivateFieldName()
121
    {
122
        if (!string.IsNullOrWhiteSpace(PrivateIdentifierColumnName))
16✔
123
            return PrivateIdentifierColumnName;
2✔
124

125
        var syntax = _ect.GetQuerySyntaxHelper();
14✔
126
        return syntax.GetRuntimeName(_ect.PrivateIdentifierField);
14✔
127
    }
128
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc