• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

WillemOpperman / csv-diff-dotnet / 8685582570

15 Apr 2024 08:05AM UTC coverage: 77.354% (+1.1%) from 76.233%
8685582570

push

github

web-flow
Merge pull request #57 from robbery/wip-speedup

speedup

345 of 446 relevant lines covered (77.35%)

28.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.6
csv-diff/Source.cs
1
using System;
2
using System.Collections;
3
using System.Collections.Generic;
4
using System.Collections.Specialized;
5
using System.Globalization;
6
using System.Linq;
7
using System.Text.RegularExpressions;
8
using csv_diff.Interfaces;
9
using CsvHelper;
10
using CsvHelper.Configuration;
11

12
namespace csv_diff
13
{
14
    // Represents an input (i.e the left/from or right/to input) to the diff process.
15
    public class Source : ISource
16
    {
17
        public string Path { get; set; }
16✔
18
        public List<string[]> Data { get; set; }
67✔
19
        public List<string> FieldNames { get; set; }
1,114✔
20
        public List<string> KeyFields { get; set; }
47✔
21
        public List<string> ParentFields { get; set; }
156✔
22
        public List<string> ChildFields { get; set; }
56✔
23
        public List<int> KeyFieldIndexes { get; set; }
113✔
24
        public List<int> ParentFieldIndexes { get; set; }
28✔
25
        public List<int> ChildFieldIndexes { get; set; }
28✔
26
        public bool CaseSensitive { get; set; }
199✔
27
        public bool TrimWhitespace { get; set; }
281✔
28
        public bool IgnoreHeader { get; set; }
16✔
29
        public Dictionary<string, Regex> Include { get; set; }
28✔
30
        public Dictionary<string, Regex> Exclude { get; set; }
28✔
31
        public List<string> Warnings { get; set; }
14✔
32
        public int LineCount { get; set; }
184✔
33
        public int SkipCount { get; set; }
26✔
34
        public int DupCount { get; set; }
14✔
35
        public SortedList<string, Dictionary<string, object>> Lines { get; set; }
204✔
36
        public Dictionary<string, Dictionary<string, int>> Index { get; set; }
222✔
37

38
        public Source(Dictionary<string, object> options = null)
14✔
39
        {
40
            if (options == null)
14✔
41
                options = new Dictionary<string, object>();
×
42

43
            if ((!options.ContainsKey("parent_field") && !options.ContainsKey("parent_fields") &&
14✔
44
                !options.ContainsKey("child_field") && !options.ContainsKey("child_fields")) &&
14✔
45
                (options.ContainsKey("key_field") || options.ContainsKey("key_fields")))
14✔
46
            {
47
                var keyFields = options.ContainsKey("key_field") ?
6✔
48
                    new List<string> { options["key_field"].ToString() } :
6✔
49
                    ((IList)options["key_fields"]).Cast<object>().ToList().Select(kf => kf.ToString()).ToList();
18✔
50
                ParentFields = new List<string>();
6✔
51
                ChildFields = keyFields;
6✔
52
                KeyFields = keyFields;
6✔
53
            }
54
            else if ((options.ContainsKey("parent_field") || options.ContainsKey("parent_fields")) &&
8✔
55
                      (options.ContainsKey("child_field") || options.ContainsKey("child_fields")))
8✔
56
            {
57
                ParentFields = options.ContainsKey("parent_field") ?
8✔
58
                    new List<string> { options["parent_field"].ToString() } :
8✔
59
                    ((List<object>)options["parent_fields"]).Select(pf => pf.ToString()).ToList();
8✔
60

61
                ChildFields = options.ContainsKey("child_field") ?
8✔
62
                    new List<string> { options["child_field"].ToString() } :
8✔
63
                    ((List<object>)options["child_fields"]).Select(cf => cf.ToString()).ToList();
8✔
64

65
                KeyFields = ParentFields.Concat(ChildFields).ToList();
8✔
66
            }
67
            else
68
            {
69
                ParentFields = new List<string>();
×
70
                ChildFields = new List<string> { "0" };
×
71
                KeyFields = new List<string> { "0" };
×
72
            }
73

74
            if (options.ContainsKey("field_names"))
14✔
75
            {
76
                FieldNames = ((List<object>)options["field_names"]).Select(fn => fn.ToString()).ToList();
×
77
            }
78
            CaseSensitive = !options.ContainsKey("case_sensitive") || (bool)options["case_sensitive"];
14✔
79
            TrimWhitespace = options.ContainsKey("trim_whitespace") && (bool)options["trim_whitespace"];
14✔
80
            IgnoreHeader = options.ContainsKey("ignore_header") && (bool)options["ignore_header"];
14✔
81

82
            if (options.ContainsKey("include"))
14✔
83
            {
84
                Include = (Dictionary<string, Regex>)options["include"];
2✔
85
            }
86

87
            if (options.ContainsKey("exclude"))
14✔
88
            {
89
                Exclude = (Dictionary<string, Regex>)options["exclude"];
2✔
90
            }
91

92
            Path = options.ContainsKey("path") ? options["path"].ToString() : "NA";
14✔
93
            Warnings = new List<string>();
14✔
94
        }
14✔
95

96
        public bool PathExists()
97
        {
98
            return Path != "NA";
×
99
        }
100

101
        public Dictionary<string, object> this[string key]
102
        {
103
            get
104
            {
105
                if (Lines.TryGetValue(key, out var value))
×
106
                {
107
                    return value;
×
108
                }
109
                return null;
×
110
            }
111
        }
112

113
        public void IndexSource()
114
        {
115
            Lines = new SortedList<string, Dictionary<string, object>>();
14✔
116
            Index = new Dictionary<string, Dictionary<string, int>>();
14✔
117
            if (FieldNames != null)
14✔
118
            {
119
                IndexFields();
2✔
120
            }
121
            var includeFilter = ConvertFilter(Include, FieldNames);
14✔
122
            var excludeFilter = ConvertFilter(Exclude, FieldNames);
14✔
123
            LineCount = 0;
14✔
124
            SkipCount = 0;
14✔
125
            DupCount = 0;
14✔
126
            var lineNum = 0;
14✔
127
            var idx = 0;
14✔
128
            foreach (var row in Data)
230✔
129
            {
130
                lineNum++;
101✔
131
                if (lineNum == 1 && FieldNames != null && IgnoreHeader)
101✔
132
                {
133
                    continue;
134
                }
135

136
                if (FieldNames == null)
101✔
137
                {
138
                    FieldNames = row.Select((_, i) => _.ToString() ?? i.ToString()).ToList();
48✔
139
                    IndexFields();
12✔
140
                    includeFilter = ConvertFilter(Include, FieldNames);
12✔
141
                    excludeFilter = ConvertFilter(Exclude, FieldNames);
12✔
142
                    continue;
12✔
143
                }
144

145
                var line = new Dictionary<string, object>();
89✔
146
                var filter = false;
89✔
147

148
                for (var i = 0; i < FieldNames.Count; i++)
704✔
149
                {
150
                    var field = FieldNames[i];
267✔
151
                    var val = row[i]?.ToString();
267✔
152
                    if (TrimWhitespace && val != null)
267✔
153
                    {
154
                        val = val.Trim();
×
155
                    }
156
                    line[field] = val;
267✔
157
                    if (includeFilter != null && includeFilter.TryGetValue(field, out var include))
267✔
158
                    {
159
                        filter = !CheckFilter(include, line[field]);
13✔
160
                    }
161

162
                    if (excludeFilter != null && excludeFilter.TryGetValue(field, out var exclude))
267✔
163
                    {
164
                        filter = CheckFilter(exclude, line[field]);
13✔
165
                    }
166

167
                    if (filter)
267✔
168
                    {
169
                        SkipCount++;
4✔
170
                        break;
4✔
171
                    }
172
                }
173

174
                if (filter)
89✔
175
                {
176
                    continue;
177
                }
178

179
                var keyValues = KeyFieldIndexes.Select(kf => (CaseSensitive ? line[FieldNames[kf]] : line[FieldNames[kf]].ToString().ToUpper())).ToList();
255✔
180
                var key = string.Join("~", keyValues);
85✔
181
                var parentKey = string.Join("~", keyValues.Take(ParentFields.Count));
85✔
182
                if (Lines.ContainsKey(key))
85✔
183
                {
184
                    Warnings.Add($"Duplicate key '{key}' encountered at line {lineNum}");
×
185
                    DupCount++;
×
186
                    key += $"[{DupCount}]";
×
187
                }
188

189
                if (!Index.ContainsKey(parentKey))
85✔
190
                {
191
                    Index[parentKey] = new Dictionary<string, int>();
28✔
192
                }
193

194
                Index[parentKey].Add(key, idx++);
85✔
195
                Lines[key] = line;
85✔
196
                LineCount++;
85✔
197
            }
198
        }
14✔
199

200
        public void SaveCSV(string filePath, Dictionary<string, object> options = null)
201
        {
202
            using (var writer = new System.IO.StreamWriter(filePath))
×
203
            {
204
                var defaultOpts = new CsvConfiguration(CultureInfo.InvariantCulture)
×
205
                {
×
206
                    HasHeaderRecord = true,
×
207
                };
×
208
                var csv = new CsvHelper.CsvWriter(writer, defaultOpts);
×
209
                foreach (var row in Data)
×
210
                {
211
                    csv.WriteRecords(new[] { row });
×
212
                }
213
            }
214
        }
×
215

216
        public List<Dictionary<string, string>> ToHash()
217
        {
218
            return Data.Select(row =>
×
219
            {
×
220
                var dict = new Dictionary<string, string>();
×
221
                for (var i = 0; i < FieldNames.Count; i++)
×
222
                {
×
223
                    dict[FieldNames[i]] = row[i];
×
224
                }
×
225
                return dict;
×
226
            }).ToList();
×
227
        }
228

229
        private void IndexFields()
230
        {
231
            KeyFieldIndexes = FindFieldIndexes(KeyFields, FieldNames);
14✔
232
            ParentFieldIndexes = FindFieldIndexes(ParentFields, FieldNames);
14✔
233
            ChildFieldIndexes = FindFieldIndexes(ChildFields, FieldNames);
14✔
234
            KeyFields = KeyFieldIndexes.Select(kf => FieldNames[kf]).ToList();
42✔
235
            ParentFields = ParentFieldIndexes.Select(pf => FieldNames[pf]).ToList();
22✔
236
            ChildFields = ChildFieldIndexes.Select(cf => FieldNames[cf]).ToList();
34✔
237
        }
14✔
238

239
        private List<int> FindFieldIndexes(List<string> keyFields, List<string> fieldNames)
240
        {
241
            return keyFields.Select(field =>
42✔
242
            {
42✔
243
                if (int.TryParse(field, out var fieldIndex))
56✔
244
                {
42✔
245
                    return fieldIndex;
56✔
246
                }
42✔
247

42✔
248
                var fieldIndexIgnoreCase = fieldNames.FindIndex(fn =>
×
249
                    fn.Equals(field, CaseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase));
×
250

42✔
251
                if (fieldIndexIgnoreCase == -1)
×
252
                {
42✔
253
                    throw new ArgumentException($"Could not locate field '{field}' in source field names: {string.Join(", ", fieldNames)}");
×
254
                }
42✔
255

42✔
256
                return fieldIndexIgnoreCase;
×
257
            }).ToList();
42✔
258
        }
259

260
        private Dictionary<string, Regex> ConvertFilter(Dictionary<string, Regex> hsh, List<string> fieldNames)
261
        {
262
            if (hsh is null || fieldNames is null)
52✔
263
            {
264
                return null;
48✔
265
            }
266

267
            var filter = new Dictionary<string, Regex>();
4✔
268
            foreach (var kvp in hsh)
16✔
269
            {
270
                var key = kvp.Key.ToString();
4✔
271
                var index = int.TryParse(key, out var fieldIndex) ? fieldIndex : fieldNames.IndexOf(key);
4✔
272

273
                if (index == -1)
4✔
274
                {
275
                    throw new ArgumentException($"Field '{key}' specified in filter not found in field names: {string.Join(", ", fieldNames)}");
×
276
                }
277

278
                filter[fieldNames[index]] = new Regex(kvp.Value.ToString());
4✔
279
            }
280

281
            return filter;
4✔
282
        }
283

284
        // Checks whether the given filter matches the field value.
285
        private bool CheckFilter(object filter, object fieldValue)
286
        {
287
            if (filter is string s)
26✔
288
            {
289
                return CaseSensitive ? s == (string)fieldValue : s.Equals((string)fieldValue, StringComparison.OrdinalIgnoreCase);
×
290
            }
291

292
            if (filter is Regex regex)
26✔
293
            {
294
                return regex.IsMatch((string)fieldValue);
26✔
295
            }
296

297
            if (filter is Func<string, bool> func)
×
298
            {
299
                return func((string)fieldValue);
×
300
            }
301

302
            throw new ArgumentException($"Unsupported filter expression: {filter}");
×
303
        }
304
    }
305
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc