• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

WillemOpperman / csv-diff-dotnet / 8685582570

15 Apr 2024 08:05AM UTC coverage: 77.354% (+1.1%) from 76.233%
8685582570

push

github

web-flow
Merge pull request #57 from robbery/wip-speedup

speedup

345 of 446 relevant lines covered (77.35%)

28.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.68
csv-diff/Algorithm.cs
1
using System;
2
using System.Collections.Generic;
3
using System.Linq;
4
using csv_diff.Interfaces;
5

6
namespace csv_diff
7
{
8
    // Implements the CSV diff algorithm.
9
    public class Algorithm
10
    {
11
        // Diffs two CSVSource structures.
12
        public Dictionary<string, Diff> DiffSources(
13
            ISource left,
14
            ISource right,
15
            string[] keyFields,
16
            string[] diffFields,
17
            IDictionary<string, object> options = null)
18
        {
19
            if (left.CaseSensitive != right.CaseSensitive)
5✔
20
            {
21
                throw new ArgumentException("Left and right must have the same settings for case-sensitivity");
×
22
            }
23

24
            if (left.ParentFields.Count != right.ParentFields.Count)
5✔
25
            {
26
                throw new ArgumentException("Left and right must have the same settings for parent/child fields");
×
27
            }
28

29
            // Ensure key fields are not also in the diff_fields
30
            var diffFieldSet = new HashSet<string>(diffFields);
5✔
31
            var diffFieldsNoKeys = diffFieldSet.Except(keyFields).ToArray();
5✔
32

33
            var leftIndex = left.Index;
5✔
34
            var leftValues = left.Lines;
5✔
35
            var leftKeys = leftValues.Keys;
5✔
36
            var rightIndex = right.Index;
5✔
37
            var rightValues = right.Lines;
5✔
38
            var rightKeys = rightValues.Keys;
5✔
39
            var parentFieldCount = left.ParentFields.Count;
5✔
40

41
            var includeAdds = options.TryGetValue("ignore_adds", out var ignoreAdds) ? !(bool)ignoreAdds : true;
5✔
42
            var includeMoves = options.TryGetValue("ignore_moves", out var ignoreMoves) ? !(bool)ignoreMoves : true;
5✔
43
            var includeUpdates = options.TryGetValue("ignore_updates", out var ignoreUpdated) ? !(bool)ignoreUpdated : true;
5✔
44
            var includeDeletes = options.TryGetValue("ignore_deletes", out var ignoreDeletes) ? !(bool)ignoreDeletes : true;
5✔
45

46
            var caseSensitive = left.CaseSensitive;
5✔
47
            var equalityProcs = options?.ContainsKey("equality_procs") == true
5✔
48
                ? (IDictionary<string, Func<object, object, bool>>)options["equality_procs"]
5✔
49
                : new Dictionary<string, Func<object, object, bool>>();
5✔
50

51
            var diffs = new Dictionary<string, Diff>();
5✔
52
            var potentialMoves = new Dictionary<string, List<string>>();
5✔
53

54
            // First identify deletions
55
            if (includeDeletes)
5✔
56
            {
57
                foreach (var key in leftKeys.Except(rightKeys))
38✔
58
                {
59
                    // Delete
60
                    var keyVals = key.Split('~');
14✔
61
                    var parent = string.Join("~", keyVals.Take(parentFieldCount));
14✔
62
                    var child = string.Join("~", keyVals.Skip(parentFieldCount));
14✔
63
                    var leftParent = leftIndex[parent];
14✔
64
                    var leftValue = leftValues[key];
14✔
65
                    
66
                    var rowIdx = leftKeys.IndexOf(key);
14✔
67
                    var sibIdx = leftParent[key];
14✔
68
                    if (sibIdx < 0)
14✔
69
                    {
70
                        throw new Exception($"Can't locate key {key} in parent {parent}");
×
71
                    }
72

73
                    diffs[key] = new Diff("delete", leftValue, rowIdx, sibIdx);
14✔
74
                    if (!potentialMoves.ContainsKey(child))
14✔
75
                    {
76
                        potentialMoves[child] = new List<string>();
14✔
77
                    }
78

79
                    potentialMoves[child].Add(key);
14✔
80
                }
81
            }
82

83
            // Now identify adds/updates
84
            foreach (var key in rightKeys)
78✔
85
            {
86
                var keyVals = key.Split('~');
34✔
87
                var parent = string.Join("~", keyVals.Take(parentFieldCount));
34✔
88
                var leftParent = leftIndex.ContainsKey(parent) ? leftIndex[parent] : null;
34✔
89
                var rightParent = rightIndex[parent];
34✔
90
                var leftValue = leftValues.ContainsKey(key) ? leftValues[key] : null;
34✔
91
                var rightValue = rightValues[key];
34✔
92
                // the following 2 IndexOf calls are the most expensive part of the diff
93
        
94
                var keyInLeftParent =  leftParent != null && leftParent.ContainsKey(key);
34✔
95
                var keyInRightParent = rightParent.ContainsKey(key);
34✔
96
                if (keyInLeftParent && keyInRightParent)
34✔
97
                {
98
                    var leftIdx = leftParent?[key] ?? -1;
15✔
99
                    var rightIdx = rightParent[key];
15✔
100
                    if (includeUpdates && diffFieldsNoKeys.Length > 0)
15✔
101
                    {
102
                        var changes = DiffRow(leftValue, rightValue, diffFieldsNoKeys, caseSensitive, equalityProcs);
15✔
103
                        if (changes.Count > 0)
15✔
104
                        {
105
                            var id = IdFields(keyFields, rightValue);
5✔
106
                            
107
                            diffs[key] = new Diff("update", id.Union(changes).ToDictionary(x => x.Key, x => x.Value), rightIdx, rightIdx);
35✔
108
                        }
109
                    }
110

111
                    if (includeMoves && leftIdx >= 0 && rightIdx >= 0 && leftIdx != rightIdx)
15✔
112
                    {
113
                        // Move
114
                        if (diffs.TryGetValue(key, out var d))
10✔
115
                        {
116
                            d.SiblingPosition = new List<int> { leftIdx, rightIdx };
3✔
117
                        }
118
                        else
119
                        {
120
                            var id = IdFields(keyFields, rightValue);
7✔
121
                            diffs[key] = new Diff("move", id, rightIdx, new List<int> { leftIdx, rightIdx });
7✔
122
                        }
123
                    }
124
                }
125
                else if (keyInRightParent)
19✔
126
                {
127
                    var rightIdx = rightParent[key];
19✔
128
                    // Add
129
                    var child = string.Join("~", keyVals.Skip(parentFieldCount));
19✔
130
                    if (potentialMoves.TryGetValue(child, out var potentialMovesList) && potentialMovesList.Count > 0)
19✔
131
                    {
132
                        var oldKey = potentialMovesList[potentialMovesList.Count - 1];
4✔
133
                        potentialMovesList.RemoveAt(potentialMovesList.Count - 1);
4✔
134
                        diffs.Remove(oldKey);
4✔
135
                        if (includeUpdates && diffFieldsNoKeys.Length > 0)
4✔
136
                        {
137
                            leftValue = leftValues[oldKey];
4✔
138
                            var id = IdFields(right.ChildFields.ToArray(), rightValue);
4✔
139
                            var changes = DiffRow(leftValue, rightValue, left.ParentFields.Concat(diffFieldsNoKeys).ToArray(), caseSensitive, equalityProcs);
4✔
140
                            
141
                            diffs[key] = new Diff("update", id.Union(changes).ToDictionary(x => x.Key, x => x.Value), rightIdx, rightIdx);
20✔
142
                        }
143
                    }
144
                    else if (includeAdds)
15✔
145
                    {
146
                        diffs[key] = new Diff("add", rightValue, rightIdx, rightIdx);
15✔
147
                    }
148
                }
149
            }
150

151
            return diffs;
5✔
152
        }
153

154
        // Identifies the fields that are different between two versions of the
155
        // same row.
156
        private IDictionary<string, object> DiffRow(
157
            IDictionary<string, object> leftRow,
158
            IDictionary<string, object> rightRow,
159
            string[] fields,
160
            bool caseSensitive,
161
            IDictionary<string, Func<object, object, bool>> equalityProcs)
162
        {
163
            var diffs = new Dictionary<string, object>();
19✔
164
            foreach (var attr in fields)
84✔
165
            {
166
                var eqProc = equalityProcs.ContainsKey(attr) ? equalityProcs[attr] : null;
23✔
167
                rightRow.TryGetValue(attr, out var rightVal);
23✔
168
                leftRow.TryGetValue(attr, out var leftVal);
23✔
169

170
                if (eqProc != null)
23✔
171
                {
172
                    if (!eqProc(leftVal, rightVal))
×
173
                    {
174
                        diffs[attr] = new object[] { leftVal, rightVal };
×
175
                    }
176
                }
177
                else
178
                {
179
                    if (caseSensitive)
23✔
180
                    {
181
                        if (!string.Equals(leftVal?.ToString(), rightVal?.ToString(), StringComparison.Ordinal))
17✔
182
                        {
183
                            diffs[attr] = new object[] { leftVal, rightVal };
7✔
184
                        }
185
                    }
186
                    else
187
                    {
188
                        if (!string.Equals(leftVal?.ToString(), rightVal?.ToString(), StringComparison.OrdinalIgnoreCase))
6✔
189
                        {
190
                            diffs[attr] = new object[] { leftVal, rightVal };
2✔
191
                        }
192
                    }
193
                }
194
            }
195

196
            return diffs;
19✔
197
        }
198

199
        // Return a dictionary containing just the key field values
200
        private Dictionary<string, object> IdFields(string[] keyFields, IDictionary<string, object> fields)
201
        {
202
            var id = new Dictionary<string, object>();
16✔
203
            foreach (var field in keyFields)
88✔
204
            {
205
                if (fields.TryGetValue(field, out var value))
28✔
206
                {
207
                    id[field] = value;
28✔
208
                }
209
            }
210

211
            return id;
16✔
212
        }
213
    }
214
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc