• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

HicServices / RDMP / 6245535001

20 Sep 2023 07:44AM UTC coverage: 57.013%. First build
6245535001

push

github

web-flow
8.1.0 Release (#1628)

* Bump Newtonsoft.Json from 13.0.1 to 13.0.2

Bumps [Newtonsoft.Json](https://github.com/JamesNK/Newtonsoft.Json) from 13.0.1 to 13.0.2.
- [Release notes](https://github.com/JamesNK/Newtonsoft.Json/releases)
- [Commits](https://github.com/JamesNK/Newtonsoft.Json/compare/13.0.1...13.0.2)

---
updated-dependencies:
- dependency-name: Newtonsoft.Json
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump NLog from 5.0.5 to 5.1.0

Bumps [NLog](https://github.com/NLog/NLog) from 5.0.5 to 5.1.0.
- [Release notes](https://github.com/NLog/NLog/releases)
- [Changelog](https://github.com/NLog/NLog/blob/dev/CHANGELOG.md)
- [Commits](https://github.com/NLog/NLog/compare/v5.0.5...v5.1.0)

---
updated-dependencies:
- dependency-name: NLog
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump NLog from 5.0.5 to 5.1.0

* Fix -r flag - should have been --results-directory all along

* Bump Newtonsoft.Json from 13.0.1 to 13.0.2

* Bump YamlDotNet from 12.0.2 to 12.1.0

Bumps [YamlDotNet](https://github.com/aaubry/YamlDotNet) from 12.0.2 to 12.1.0.
- [Release notes](https://github.com/aaubry/YamlDotNet/releases)
- [Commits](https://github.com/aaubry/YamlDotNet/compare/v12.0.2...v12.1.0)

---
updated-dependencies:
- dependency-name: YamlDotNet
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Moq from 4.18.2 to 4.18.3

Bumps [Moq](https://github.com/moq/moq4) from 4.18.2 to 4.18.3.
- [Release notes](https://github.com/moq/moq4/releases)
- [Changelog](https://github.com/moq/moq4/blob/main/CHANGELOG.md)
- [Commits](https://github.com/moq/moq4/compare/v4.18.2...v4.18.3)

---
updated-dependencies:
- dependency-name: Moq
... (continued)

10732 of 20257 branches covered (0.0%)

Branch coverage included in aggregate %.

48141 of 48141 new or added lines in 1086 files covered. (100.0%)

30685 of 52388 relevant lines covered (58.57%)

7387.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.14
/Rdmp.Core/DataLoad/Modules/DataFlowSources/DelimitedFlatFileDataFlowSource.cs
1
// Copyright (c) The University of Dundee 2018-2019
2
// This file is part of the Research Data Management Platform (RDMP).
3
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
4
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
5
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
6

7
using System;
8
using System.Data;
9
using System.Globalization;
10
using System.IO;
11
using System.Linq;
12
using CsvHelper;
13
using CsvHelper.Configuration;
14
using FAnsi.Discovery;
15
using Rdmp.Core.Curation.Data;
16
using Rdmp.Core.DataFlowPipeline;
17
using Rdmp.Core.DataFlowPipeline.Requirements;
18
using Rdmp.Core.DataLoad.Engine.Attachers;
19
using Rdmp.Core.DataLoad.Modules.DataFlowSources.SubComponents;
20
using Rdmp.Core.ReusableLibraryCode.Checks;
21
using Rdmp.Core.ReusableLibraryCode.Progress;
22

23
namespace Rdmp.Core.DataLoad.Modules.DataFlowSources;
24

25
/// <summary>
26
/// Pipeline component (source) for reading from a flat file delimited by by a specific character (or string) e.g. csv.  The file is batch processed into
27
/// DataTables of size MaxBatchSize (to avoid memory problems in large files).
28
/// 
29
/// <para>Values read are fed into the pipeline as a DataTable with the Name of the DataTable being the name of the file being read.  Example usage would
30
/// be setting the separator to , to read CSV files.</para>
31
/// </summary>
32
public class DelimitedFlatFileDataFlowSource : IPluginDataFlowSource<DataTable>, IPipelineRequirement<FlatFileToLoad>
33
{
34
    private CsvReader _reader;
35

36
    private bool _dataAvailable;
37
    private IDataLoadEventListener _listener;
38

39
    /// <summary>
40
    /// The minimum value to allow the user to specify for <see cref="StronglyTypeInputBatchSize"/>
41
    /// </summary>
42
    public const int MinimumStronglyTypeInputBatchSize = 500;
43

44
    #region User viewable descriptions of what properties do (used to help wrapper classes have consistent definitions
45

46
    public const string ForceHeaders_DemandDescription =
47
        "Forces specific headers to be interpreted for columns, this is a string that will effectively be appended to the front of the file when it is read.  WARNING: Use this argument only when the file does not have any headers (Note that you must use the appropriate separator for your file)";
48

49
    public const string ForceHeadersReplacesFirstLineInFile_Description =
50
        "Only used when ForceHeaders is specified, if true then the line will replace the first line of the file.  If left as false (default) then the line will be appended to the file.  Use true if you want to replace existing headers in the file and false if hte file doesn't have any headers in it at all.";
51

52
    public const string IgnoreQuotes_DemandDescription =
53
        "True if the parser should treat double quotes as normal characters";
54

55
    public const string IgnoreBlankLines_DemandDescription = "True if the parser should skip over blank lines";
56

57
    public const string MakeHeaderNamesSane_DemandDescription =
58
        "True (recommended) if you want to fix columns that have crazy names e.g. 'my column #1' would become 'mycolumn1'";
59

60
    public const string BadDataHandlingStrategy_DemandDescription =
61
        @"Determines system behaviour when unprocessable rows are found in the file being loaded:
62
ThrowException - Stop the loading process with an error
63
IgnoreRows - Step over the line in the file and carry on loading
64
DivertRows - As IgnoreRows but write all unloadable lines to X_Errors.txt where X is the file name being loaded";
65

66
    public const string IgnoreBadReads_DemandDescription =
67
        @"True - Ignore read warnings from CSVHelper (e.g. when a quote appears in the middle of a cell)
68
False - Treat read warnings from CSVHelper according to the BadDataHandlingStrategy";
69

70
    public const string ThrowOnEmptyFiles_DemandDescription =
71
        @"Determines system behaviour when a file is empty or has only a header row";
72

73
    public const string AttemptToResolveNewLinesInRecords_DemandDescription =
74
        @"Determines system behaviour when a line has too few cells compared to the header count.  
75
True - Attempt to read more lines to make a complete record
76
False - Treat the line as bad data (See BadDataHandlingStrategy)";
77

78
    public const string MaximumErrorsToReport_DemandDescription =
79
        "The maximum number of file report before suppressing logging.  This is important if you have a large file e.g. 80 million rows and you have a bug/configuration problem that results in lots of bad rows.  Specify 0 for no limit.  Negatives also result in no limit";
80

81
    public const string IgnoreColumns_Description =
82
        @"By default all columns from the source (file) will be read.  Set this to a list of headers (separated with the correct separator for your file) to ignore the specified columns.
83

84
This will not help you avoid bad data as the full file structure must still be read regardless.";
85

86
    #endregion
87

88
    [DemandsInitialization("The separator that delimits the file", Mandatory = true)]
89
    public string Separator
90
    {
91
        get => _separator;
662✔
92
        set => _separator =
484✔
93
            value == "\\t"
484✔
94
                ? "\t"
484✔
95
                : value; //automatically switch \\t into \t (user inputs \t it turns to whitespace tab when executing)
484✔
96
    }
97

98
    [DemandsInitialization(ForceHeaders_DemandDescription)]
99
    public string ForceHeaders { get; set; }
882✔
100

101
    [DemandsInitialization(ForceHeadersReplacesFirstLineInFile_Description)]
102
    public bool ForceHeadersReplacesFirstLineInFile { get; set; }
416✔
103

104
    [DemandsInitialization(IgnoreQuotes_DemandDescription)]
105
    public bool IgnoreQuotes { get; set; }
386✔
106

107
    [DemandsInitialization(IgnoreQuotes_DemandDescription)]
108
    public bool IgnoreBlankLines { get; set; }
378✔
109

110
    [DemandsInitialization(MakeHeaderNamesSane_DemandDescription, DemandType.Unspecified, true)]
111
    public bool MakeHeaderNamesSane { get; set; }
254✔
112

113
    [DemandsInitialization(
114
        "True (recommended) if you want to impute the datatypes from the data being loaded, False if you want to load everything as strings",
115
        DemandType.Unspecified, true)]
116
    public bool StronglyTypeInput { get; set; }
682✔
117

118
    [DemandsInitialization(
119
        "BatchSize to use when predicting datatypes i.e. if you set this to 1000 then the first 1000 rows have int field then the 5000th row has a string you will get an error.  Set to 0 to use MaxBatchSize.  Set to -1 to load the entire file before computing datatypes (can result in out of memory for super large files)")]
120
    public int StronglyTypeInputBatchSize { get; set; }
712✔
121

122
    [DemandsInitialization(
123
        "Number of rows to read at once from the input file in each go (after the first - See StronglyTypeInputBatchSize)",
124
        DefaultValue = 100000)]
125
    public int MaxBatchSize { get; set; }
450✔
126

127
    [DemandsInitialization(
128
        "A collection of column names that are expected to be found in the input file which you want to specify as explicit types (e.g. you load barcodes like 0110 and 1111 and want these all loaded as char(4) instead of int)")]
129
    public ExplicitTypingCollection ExplicitlyTypedColumns { get; set; }
372✔
130

131
    [DemandsInitialization(BadDataHandlingStrategy_DemandDescription,
132
        DefaultValue = BadDataHandlingStrategy.ThrowException)]
133
    public BadDataHandlingStrategy BadDataHandlingStrategy { get; set; }
452✔
134

135
    [DemandsInitialization(IgnoreBadReads_DemandDescription, DefaultValue = true)]
136
    public bool IgnoreBadReads { get; set; }
384✔
137

138
    [DemandsInitialization(ThrowOnEmptyFiles_DemandDescription, DefaultValue = true)]
139
    public bool ThrowOnEmptyFiles { get; set; }
430✔
140

141
    [DemandsInitialization(AttemptToResolveNewLinesInRecords_DemandDescription, DefaultValue = false)]
142
    public bool AttemptToResolveNewLinesInRecords { get; set; }
510✔
143

144
    [DemandsInitialization(MaximumErrorsToReport_DemandDescription, DefaultValue = 100)]
145
    public int MaximumErrorsToReport { get; set; }
428✔
146

147
    [DemandsInitialization(IgnoreColumns_Description)]
148
    public string IgnoreColumns { get; set; }
382✔
149

150
    private CultureInfo _culture;
151

152
    [DemandsInitialization("The culture to use for dates")]
153
    public CultureInfo Culture
154
    {
155
        get => _culture ?? CultureInfo.CurrentCulture;
572✔
156
        set => _culture = value;
132✔
157
    }
158

159
    [DemandsInitialization(Attacher.ExplicitDateTimeFormat_DemandDescription)]
160
    public string ExplicitDateTimeFormat { get; set; }
450✔
161

162
    /// <summary>
163
    /// The database table we are trying to load
164
    /// </summary>
165
    private DataTable _workingTable;
166

167
    /// <summary>
168
    /// File we are trying to load
169
    /// </summary>
170
    private FlatFileToLoad _fileToLoad;
171

172
    public FlatFileColumnCollection Headers { get; private set; }
2,500✔
173
    public FlatFileEventHandlers EventHandlers { get; private set; }
3,756✔
174
    public FlatFileToDataTablePusher DataPusher { get; private set; }
13,728✔
175

176
    /// <summary>
177
    /// things we know we definetly cannot load!
178
    /// </summary>
179
    private string[] _prohibitedExtensions =
326✔
180
    {
326✔
181
        ".xls", ".xlsx", ".doc", ".docx"
326✔
182
    };
326✔
183

184
    private string _separator;
185

186
    /// <summary>
187
    /// Used to split the records read into chunks to avoid running out of memory
188
    /// </summary>
189
    private int _lineNumberBatch;
190

191

192
    private void InitializeComponents()
193
    {
194
        Headers = new FlatFileColumnCollection(_fileToLoad, MakeHeaderNamesSane, ExplicitlyTypedColumns, ForceHeaders,
244✔
195
            ForceHeadersReplacesFirstLineInFile, IgnoreColumns);
244✔
196
        DataPusher = new FlatFileToDataTablePusher(_fileToLoad, Headers, HackValueReadFromFile,
244✔
197
            AttemptToResolveNewLinesInRecords, Culture, ExplicitDateTimeFormat);
244✔
198
        EventHandlers = new FlatFileEventHandlers(_fileToLoad, DataPusher, ThrowOnEmptyFiles, BadDataHandlingStrategy,
244✔
199
            _listener, MaximumErrorsToReport <= 0 ? int.MaxValue : MaximumErrorsToReport, IgnoreBadReads);
244✔
200
    }
244✔
201

202

203
    public DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
204
    {
205
        try
206
        {
207
            _listener = listener;
344✔
208

209
            var rowsRead = 0;
344✔
210

211
            if (_fileToLoad == null)
344✔
212
                throw new Exception(
2✔
213
                    "_fileToLoad was not set, it is supposed to be set because of IPipelineRequirement<FlatFileToLoad> - maybe this PreInitialize method was not called?");
2✔
214

215
            if (Headers == null)
342✔
216
            {
217
                InitializeComponents();
158✔
218

219
                //open the file
220
                OpenFile(_fileToLoad.File);
158✔
221

222
                if (Headers.FileIsEmpty)
156✔
223
                {
224
                    EventHandlers.FileIsEmpty();
4✔
225
                    return null;
2✔
226
                }
227
            }
228

229
            //if we do not yet have a data table to load
230
            if (_workingTable == null)
336✔
231
            {
232
                //create a table with the name of the file
233
                _workingTable = Headers.GetDataTableWithHeaders(_listener);
152✔
234
                _workingTable.TableName =
152✔
235
                    QuerySyntaxHelper.MakeHeaderNameSensible(Path.GetFileNameWithoutExtension(_fileToLoad.File.Name));
152✔
236

237
                //set the data table to the new untyped but correctly headered table
238
                SetDataTable(_workingTable);
152✔
239

240
                //Now we must read some data
241
                if (StronglyTypeInput && StronglyTypeInputBatchSize != 0)
152✔
242
                {
243
                    var batchSizeToLoad = StronglyTypeInputBatchSize == -1
100✔
244
                        ? int.MaxValue
100✔
245
                        : StronglyTypeInputBatchSize;
100✔
246

247
                    if (batchSizeToLoad < MinimumStronglyTypeInputBatchSize)
100✔
248
                        listener.OnNotify(this,
94✔
249
                            new NotifyEventArgs(ProgressEventType.Warning,
94✔
250
                                $"You set StronglyTypeInputBatchSize to {batchSizeToLoad} this may be too small!",
94✔
251
                                null));
94✔
252

253
                    //user want's to strongly type input with a custom batch size
254
                    rowsRead = IterativelyBatchLoadDataIntoDataTable(_workingTable, batchSizeToLoad);
100✔
255
                }
256
                else
257
                //user does not want to strongly type or is strongly typing with regular batch size
258
                {
259
                    rowsRead = IterativelyBatchLoadDataIntoDataTable(_workingTable, MaxBatchSize);
52✔
260
                }
261

262
                if (StronglyTypeInput)
120✔
263
                    _workingTable = DataPusher.StronglyTypeTable(_workingTable, ExplicitlyTypedColumns);
116✔
264

265
                if (rowsRead == 0)
120✔
266
                    EventHandlers.FileIsEmpty();
12✔
267
            }
268
            else
269
            {
270
                //this isn't the first pass, so we have everything set up and can just read more data
271

272
                //data table has been set so has a good schema or no schema depending on what user wanted, at least it has all the headers etc setup correctly
273
                //so just clear the rows we loaded last chunk and load more
274
                _workingTable.Rows.Clear();
184✔
275

276
                //get more rows
277
                rowsRead = IterativelyBatchLoadDataIntoDataTable(_workingTable, MaxBatchSize);
184✔
278
            }
279

280
            //however we read
281

282
            //if rows were not read
283
            if (rowsRead == 0)
298✔
284
                return null; //we are done
100✔
285

286
            //rows were read so return a copy of the DataTable, because we will continually reload the same DataTable schema throughout the file we don't want to give up our reference to good headers incase someone mutlates it
287
            var copy = _workingTable.Copy();
198✔
288

289
            foreach (var unamed in Headers.UnamedColumns)
404✔
290
                copy.Columns.Remove(unamed.ColumnName);
4✔
291

292
            return copy;
198✔
293
        }
294
        catch (Exception)
44✔
295
        {
296
            //make sure file is closed if it crashes
297
            _reader?.Dispose();
44✔
298
            throw;
44✔
299
        }
300
    }
300✔
301

302

303
    public void Dispose(IDataLoadEventListener listener, Exception pipelineFailureExceptionIfAny)
304
    {
305
        CloseReader();
238✔
306
    }
238✔
307

308
    public void Abort(IDataLoadEventListener listener)
309
    {
310
        CloseReader();
×
311
    }
×
312

313
    private void CloseReader()
314
    {
315
        if (_reader != null)
240✔
316
        {
317
            _reader.Dispose();
240✔
318
            _reader = null;
240✔
319
        }
320
    }
240✔
321

322
    public DataTable TryGetPreview()
323
    {
324
        //there is already a data table in memory
325
        if (_workingTable != null)
2!
326
            return _workingTable;
×
327

328
        //we have not loaded anything yet
329
        if (Headers == null)
2!
330
        {
331
            //get a chunk
332
            var toReturn = GetChunk(ThrowImmediatelyDataLoadEventListener.Quiet, new GracefulCancellationToken());
2✔
333

334
            //clear these to close the file and reset state to 'I need to open the file again state'
335
            CloseReader();
2✔
336

337
            Headers = null;
2✔
338
            EventHandlers = null;
2✔
339
            DataPusher = null;
2✔
340

341
            _workingTable = null;
2✔
342
            _reader = null;
2✔
343

344
            return toReturn;
2✔
345
        }
346

347
        throw new NotSupportedException(
×
348
            "Cannot generate preview because _headers has already been set which likely means it is already loading / didn't cleanup properly after last preview attempt?");
×
349
    }
350

351
    public void Check(ICheckNotifier notifier)
352
    {
353
        notifier.OnCheckPerformed(Separator == null
4!
354
            ? new CheckEventArgs($"Separator argument has not been set on {GetType().Name}", CheckResult.Fail)
4✔
355
            : new CheckEventArgs($"Separator on {GetType().Name} is {Separator}", CheckResult.Success));
4✔
356

357
        if (!StronglyTypeInput)
4✔
358
            notifier.OnCheckPerformed(
2✔
359
                new CheckEventArgs("StronglyTypeInput is false, this feature is highly recommended",
2✔
360
                    CheckResult.Warning));
2✔
361

362
        if (StronglyTypeInput && StronglyTypeInputBatchSize < 500)
4✔
363
            notifier.OnCheckPerformed(
2✔
364
                new CheckEventArgs(
2✔
365
                    "StronglyTypeInputBatchSize is less than the recommended 500: this may cause errors when determining the best data type from the source file.",
2✔
366
                    CheckResult.Warning));
2✔
367

368
        if (_fileToLoad.File == null)
4!
369
        {
370
            notifier.OnCheckPerformed(
×
371
                new CheckEventArgs(
×
372
                    "Input object FlatFileToLoad had a null .File property.  This means the FlatFileToLoad is not known.  This is only valid at DesignTime and only if the source file is unknown.  It means we can't check our compatibility with the file",
×
373
                    CheckResult.Warning));
×
374
            return;
×
375
        }
376

377
        if (_fileToLoad != null)
4✔
378
            CheckExpectedFileExtensions(notifier, _fileToLoad.File.Extension);
4✔
379
    }
2✔
380

381
    private void CheckExpectedFileExtensions(ICheckNotifier notifier, string extension)
382
    {
383
        if (_prohibitedExtensions.Contains(extension))
4✔
384
        {
385
            notifier.OnCheckPerformed(
2✔
386
                new CheckEventArgs(
2✔
387
                    $"File {_fileToLoad.File.Name} has a prohibited file extension {_fileToLoad.File.Extension} (this class is designed to handle .csv, .tsv, .txt etc - basically anything that is delimited by characters and not some freaky binary/fixed width file type",
2✔
388
                    CheckResult.Fail));
2✔
389
            return;
×
390
        }
391

392
        if (Separator == ",")
2✔
393
            ExpectFileExtension(notifier, ".csv", extension);
2✔
394

395
        if (Separator == "\t")
2!
396
            ExpectFileExtension(notifier, ".tsv", extension);
×
397
    }
2✔
398

399
    private static void ExpectFileExtension(ICheckNotifier notifier, string expectedExtension, string actualExtension)
400
    {
401
        if (expectedExtension.Equals(actualExtension))
2!
402
            notifier.OnCheckPerformed(new CheckEventArgs($"File extension matched expectations ({expectedExtension})",
×
403
                CheckResult.Success));
×
404
        else
405
            notifier.OnCheckPerformed(new CheckEventArgs(
2✔
406
                $"Unexpected file extension '{actualExtension}' (expected {expectedExtension}) ", CheckResult.Warning));
2✔
407
    }
2✔
408

409
    protected void OpenFile(FileInfo fileToLoad)
410
    {
411
        _dataAvailable = true;
244✔
412
        _lineNumberBatch = 0;
244✔
413

414
        //if it is blank or null (although tab is allowed)
415
        if (string.IsNullOrWhiteSpace(Separator) && Separator != "\t")
244✔
416
            throw new Exception(
2✔
417
                $"Could not open file {fileToLoad.FullName} because the file Separator has not been set yet, make sure to set all relevant [DemandsInitialization] properties");
2✔
418

419
        var sr = new StreamReader(fileToLoad.FullName);
242✔
420
        _reader = new CsvReader(sr, new CsvConfiguration(Culture)
242✔
421
        {
242✔
422
            Delimiter = Separator,
242✔
423
            HasHeaderRecord = string.IsNullOrWhiteSpace(ForceHeaders),
242✔
424
            ShouldSkipRecord = ShouldSkipRecord,
242✔
425
            IgnoreBlankLines = IgnoreBlankLines,
242✔
426
            Mode = IgnoreQuotes ? CsvMode.NoEscape : CsvMode.RFC4180,
242✔
427
            BadDataFound = s => EventHandlers.BadDataFound(new FlatFileLine(s), true),
16✔
428
            ReadingExceptionOccurred = EventHandlers.ReadingExceptionOccurred
242✔
429
        });
242✔
430

431
        Headers.GetHeadersFromFile(_reader);
242✔
432
    }
242✔
433

434

435
    private bool ShouldSkipRecord(ShouldSkipRecordArgs args)
436
    {
437
        if (_reader.Context.Parser.RawRow == 1 //first line of file
3,478✔
438
            && !string.IsNullOrWhiteSpace(ForceHeaders) //and we are forcing headers
3,478✔
439
            && ForceHeadersReplacesFirstLineInFile) //and those headers replace the first line of the file
3,478✔
440
        {
441
            Headers.ShowForceHeadersAsciiArt(args.Row, _listener);
14✔
442

443
            //skip the line
444
            return true;
14✔
445
        }
446

447
        //otherwise don't skip lines
448
        return false;
3,464✔
449
    }
450

451
    protected int IterativelyBatchLoadDataIntoDataTable(DataTable dt, int batchSize)
452
    {
453
        if (!_dataAvailable)
336✔
454
            return 0;
92✔
455

456
        if (Headers == null)
244!
457
            throw new Exception("headers was null, how did that happen?");
×
458

459
        _lineNumberBatch = 0;
244✔
460

461
        //read from the peek first if there is anything otherwise read from the reader
462
        while
3,430✔
463
            (_dataAvailable =
3,430✔
464
             DataPusher.PeekedRecord != null ||
3,430✔
465
             _reader.Read()) //while we can read data -- also record whether the data was exhausted by this Read() because  CSVReader blows up if you ask it to Read() after Read() has already returned a false once
3,430✔
466
        {
467
            FlatFileLine currentRow;
468

469
            if (DataPusher.PeekedRecord != null)
3,234✔
470
            {
471
                currentRow = DataPusher.PeekedRecord;
6✔
472
                DataPusher.PeekedRecord = null;
6✔
473
            }
474
            else
475
            {
476
                currentRow = new FlatFileLine(_reader.Context);
3,228✔
477

478
                //if there is bad data on the current row just read the next
479
                if (DataPusher.BadLines.Contains(_reader.Context.Parser.RawRow))
3,222✔
480
                    continue;
481
            }
482

483
            _lineNumberBatch += DataPusher.PushCurrentLine(_reader, currentRow, dt, _listener, EventHandlers);
3,224✔
484

485
            if (!_dataAvailable)
3,198✔
486
                break;
487

488
            //if we have enough required rows for this batch, break out of reading loop
489
            if (_lineNumberBatch >= batchSize)
3,198✔
490
                break;
491
        }
492

493
        return _lineNumberBatch;
212✔
494
    }
495

496

497
    public void PreInitialize(FlatFileToLoad value, IDataLoadEventListener listener)
498
    {
499
        //we have been given a new file we no longer know the headers.
500
        Headers = null;
254✔
501

502
        _fileToLoad = value;
254✔
503
        _listener = listener;
254✔
504
    }
254✔
505

506
    /// <summary>
507
    /// Override this if you want to mess with values as they are read from the source file in some freaky way.
508
    /// </summary>
509
    /// <param name="s"></param>
510
    /// <returns></returns>
511
    protected virtual object HackValueReadFromFile(string s) => s;
27,204✔
512

513
    /// <summary>
514
    /// Sets the target DataTable that we are loading from the csv/tsv etc
515
    /// </summary>
516
    /// <param name="dt"></param>
517
    public void SetDataTable(DataTable dt)
518
    {
519
        if (Headers == null)
320✔
520
        {
521
            InitializeComponents();
86✔
522
            OpenFile(_fileToLoad.File);
86✔
523

524
            Headers.MakeDataTableFitHeaders(dt, _listener);
86✔
525
        }
526

527
        _workingTable = dt;
318✔
528
    }
318✔
529
}
530

531
public enum BadDataHandlingStrategy
532
{
533
    ThrowException,
534
    IgnoreRows,
535
    DivertRows
536
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc