• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

VolumeGraphics / havocompare / 39ea91d87e3435e9d42b99df371df972ee344e07

pending completion
39ea91d87e3435e9d42b99df371df972ee344e07

push

github

Adinata Wijaya
add check for row lines of both compared csv files, and throw error if they are unequal

2370 of 2579 relevant lines covered (91.9%)

2274.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.2
/src/csv/preprocessing.rs
1
use crate::csv;
2
use crate::csv::value::Value;
3
use crate::csv::Table;
4
use schemars_derive::JsonSchema;
5
use serde::{Deserialize, Serialize};
6
use std::cmp::Ordering::Equal;
7
use tracing::{debug, warn};
8

9
#[derive(JsonSchema, Deserialize, Serialize, Debug)]
2✔
10
/// Preprocessor options
11
pub enum Preprocessor {
12
    /// Try to extract the headers from the first row - fallible if first row contains a number
13
    ExtractHeaders,
14
    /// Replace all fields in column by number by a deleted marker
15
    DeleteColumnByNumber(usize),
16
    /// Replace all fields in column by name by a deleted marker
17
    DeleteColumnByName(String),
18
    /// Sort rows by column with given name. Fails if no headers were extracted or column name is not found, or if any row has no numbers there
19
    SortByColumnName(String),
20
    /// Sort rows by column with given number. Fails if any row has no numbers there or if out of bounds.
21
    SortByColumnNumber(usize),
22
    /// Replace all fields in row with given number by a deleted marker
23
    DeleteRowByNumber(usize),
24
    /// Replace all fields in row  where at least a single field matches regex by a deleted marker
25
    DeleteRowByRegex(String),
26
}
27

28
impl Preprocessor {
29
    pub(crate) fn process(&self, table: &mut Table) -> Result<(), csv::Error> {
72✔
30
        match self {
72✔
31
            Preprocessor::ExtractHeaders => extract_headers(table),
72✔
32
            Preprocessor::DeleteColumnByNumber(id) => delete_column_number(table, *id),
×
33
            Preprocessor::DeleteColumnByName(name) => delete_column_name(table, name.as_str()),
×
34
            Preprocessor::SortByColumnName(name) => sort_by_column_name(table, name.as_str()),
×
35
            Preprocessor::SortByColumnNumber(id) => sort_by_column_id(table, *id),
×
36
            Preprocessor::DeleteRowByNumber(id) => delete_row_by_number(table, *id),
×
37
            Preprocessor::DeleteRowByRegex(regex) => delete_row_by_regex(table, regex),
×
38
        }
39
    }
72✔
40
}
41

42
fn delete_row_by_regex(table: &mut Table, regex: &str) -> Result<(), csv::Error> {
1✔
43
    let regex = regex::Regex::new(regex)?;
1✔
44
    table
1✔
45
        .rows_mut()
1✔
46
        .filter(|row| row.iter().any(|v| regex.is_match(v.to_string().as_str())))
1,027✔
47
        .for_each(|mut row| row.iter_mut().for_each(|v| **v = Value::deleted()));
2✔
48
    Ok(())
1✔
49
}
1✔
50

51
fn delete_row_by_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
52
    if let Some(mut v) = table.rows_mut().nth(id) {
1✔
53
        v.iter_mut().for_each(|v| **v = Value::deleted())
2✔
54
    }
×
55
    Ok(())
1✔
56
}
1✔
57

58
fn get_permutation(rows_to_sort_by: &Vec<f32>) -> permutation::Permutation {
2✔
59
    permutation::sort_by(rows_to_sort_by, |a, b| b.partial_cmp(a).unwrap_or(Equal))
4,228✔
60
}
2✔
61

62
fn apply_permutation(table: &mut Table, mut permutation: permutation::Permutation) {
2✔
63
    table.columns.iter_mut().for_each(|c| {
4✔
64
        permutation.apply_slice_in_place(&mut c.rows);
4✔
65
    });
4✔
66
}
2✔
67

68
fn sort_by_column_id(table: &mut Table, id: usize) -> Result<(), csv::Error> {
3✔
69
    let sort_master_col = table.columns.get(id).ok_or_else(|| {
3✔
70
        csv::Error::InvalidAccess(format!(
1✔
71
            "Column number sorting by id {} requested but column not found.",
1✔
72
            id
1✔
73
        ))
1✔
74
    })?;
3✔
75
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
76
        .rows
2✔
77
        .iter()
2✔
78
        .map(|v| {
515✔
79
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
80
                csv::Error::UnexpectedValue(
1✔
81
                    v.clone(),
1✔
82
                    "Expected quantity while trying to sort by column id".to_string(),
1✔
83
                )
1✔
84
            })
515✔
85
        })
515✔
86
        .collect();
2✔
87
    let permutation = get_permutation(&col_floats?);
2✔
88
    apply_permutation(table, permutation);
1✔
89
    Ok(())
1✔
90
}
3✔
91

92
fn sort_by_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
3✔
93
    let sort_master_col = table
3✔
94
        .columns
3✔
95
        .iter()
3✔
96
        .find(|c| c.header.as_deref().unwrap_or_default() == name)
5✔
97
        .ok_or_else(|| {
3✔
98
            csv::Error::InvalidAccess(format!(
1✔
99
                "Requested format sorting by column'{}' but column not found.",
1✔
100
                name
1✔
101
            ))
1✔
102
        })?;
3✔
103
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
104
        .rows
2✔
105
        .iter()
2✔
106
        .map(|v| {
515✔
107
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
108
                csv::Error::UnexpectedValue(
1✔
109
                    v.clone(),
1✔
110
                    "Expected quantity while trying to sort by column name".to_string(),
1✔
111
                )
1✔
112
            })
515✔
113
        })
515✔
114
        .collect();
2✔
115
    let permutation = get_permutation(&col_floats?);
2✔
116
    apply_permutation(table, permutation);
1✔
117
    Ok(())
1✔
118
}
3✔
119

120
fn delete_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
121
    if let Some(c) = table
1✔
122
        .columns
1✔
123
        .iter_mut()
1✔
124
        .find(|col| col.header.as_deref().unwrap_or_default() == name)
2✔
125
    {
1✔
126
        c.delete_contents();
1✔
127
    }
1✔
128
    Ok(())
1✔
129
}
1✔
130

131
fn delete_column_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
132
    if let Some(col) = table.columns.get_mut(id) {
1✔
133
        col.delete_contents();
1✔
134
    }
1✔
135
    Ok(())
1✔
136
}
1✔
137

138
fn extract_headers(table: &mut Table) -> Result<(), csv::Error> {
77✔
139
    debug!("Extracting headers...");
77✔
140
    let can_extract = table
77✔
141
        .columns
77✔
142
        .iter()
77✔
143
        .all(|c| matches!(c.rows.first(), Some(Value::String(_))));
186✔
144
    if !can_extract {
77✔
145
        warn!("Cannot extract header for this csv!");
×
146
        return Ok(());
×
147
    }
77✔
148

149
    for col in table.columns.iter_mut() {
186✔
150
        let title = col.rows.drain(0..1).next().ok_or_else(|| {
186✔
151
            csv::Error::InvalidAccess("Tried to extract header of empty column!".to_string())
×
152
        })?;
186✔
153
        if let Value::String(title) = title {
186✔
154
            col.header = Some(title);
186✔
155
        }
186✔
156
    }
157
    Ok(())
77✔
158
}
77✔
159

160
#[cfg(test)]
161
mod tests {
162
    use super::*;
163
    use crate::csv::{Column, Delimiters, Error};
164
    use std::fs::File;
165

166
    fn setup_table(delimiters: Option<Delimiters>) -> Table {
8✔
167
        let delimiters = delimiters.unwrap_or_default();
8✔
168
        Table::from_reader(
8✔
169
            File::open("tests/csv/data/DeviationHistogram.csv").unwrap(),
8✔
170
            &delimiters,
8✔
171
        )
8✔
172
        .unwrap()
8✔
173
    }
8✔
174

175
    #[test]
1✔
176
    fn test_extract_headers() {
1✔
177
        let mut table = setup_table(None);
1✔
178
        extract_headers(&mut table).unwrap();
1✔
179
        assert_eq!(
1✔
180
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
181
            "Deviation [mm]"
1✔
182
        );
1✔
183
        assert_eq!(
1✔
184
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
185
            "Surface [mm²]"
1✔
186
        );
1✔
187
    }
1✔
188

189
    #[test]
1✔
190
    fn test_delete_column_by_id() {
1✔
191
        let mut table = setup_table(None);
1✔
192
        extract_headers(&mut table).unwrap();
1✔
193
        delete_column_number(&mut table, 0).unwrap();
1✔
194
        assert_eq!(
1✔
195
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
196
            "DELETED"
1✔
197
        );
1✔
198
        assert!(table
1✔
199
            .columns
1✔
200
            .first()
1✔
201
            .unwrap()
1✔
202
            .rows
1✔
203
            .iter()
1✔
204
            .all(|v| *v == csv::Value::deleted()));
513✔
205
    }
1✔
206

207
    #[test]
1✔
208
    fn test_delete_column_by_name() {
1✔
209
        let mut table = setup_table(None);
1✔
210
        extract_headers(&mut table).unwrap();
1✔
211
        delete_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
212
        assert_eq!(
1✔
213
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
214
            "DELETED"
1✔
215
        );
1✔
216
        assert!(table
1✔
217
            .columns
1✔
218
            .last()
1✔
219
            .unwrap()
1✔
220
            .rows
1✔
221
            .iter()
1✔
222
            .all(|v| *v == csv::Value::deleted()));
513✔
223
    }
1✔
224

225
    #[test]
1✔
226
    fn test_delete_row_by_id() {
1✔
227
        let mut table = setup_table(None);
1✔
228
        delete_row_by_number(&mut table, 0).unwrap();
1✔
229
        assert_eq!(
1✔
230
            table
1✔
231
                .columns
1✔
232
                .first()
1✔
233
                .unwrap()
1✔
234
                .rows
1✔
235
                .first()
1✔
236
                .unwrap()
1✔
237
                .get_string()
1✔
238
                .as_deref()
1✔
239
                .unwrap(),
1✔
240
            "DELETED"
1✔
241
        );
1✔
242
    }
1✔
243

244
    #[test]
1✔
245
    fn test_delete_row_by_regex() {
1✔
246
        let mut table = setup_table(None);
1✔
247
        delete_row_by_regex(&mut table, "mm").unwrap();
1✔
248
        assert_eq!(
1✔
249
            table
1✔
250
                .columns
1✔
251
                .first()
1✔
252
                .unwrap()
1✔
253
                .rows
1✔
254
                .first()
1✔
255
                .unwrap()
1✔
256
                .get_string()
1✔
257
                .as_deref()
1✔
258
                .unwrap(),
1✔
259
            "DELETED"
1✔
260
        );
1✔
261
    }
1✔
262

263
    #[test]
1✔
264
    fn test_sort_by_name() {
1✔
265
        let mut table = setup_table(None);
1✔
266
        extract_headers(&mut table).unwrap();
1✔
267
        sort_by_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
268
        let mut peekable_rows = table.rows().peekable();
1✔
269
        while let Some(row) = peekable_rows.next() {
514✔
270
            if let Some(next_row) = peekable_rows.peek() {
513✔
271
                assert!(
512✔
272
                    row.get(1).unwrap().get_quantity().unwrap().value
512✔
273
                        >= next_row.get(1).unwrap().get_quantity().unwrap().value
512✔
274
                );
512✔
275
            }
1✔
276
        }
277
    }
1✔
278

279
    #[test]
1✔
280
    fn test_sort_by_id() {
1✔
281
        let mut table = setup_table(None);
1✔
282
        extract_headers(&mut table).unwrap();
1✔
283
        let column = 1;
1✔
284
        sort_by_column_id(&mut table, column).unwrap();
1✔
285
        let mut peekable_rows = table.rows().peekable();
1✔
286
        while let Some(row) = peekable_rows.next() {
514✔
287
            if let Some(next_row) = peekable_rows.peek() {
513✔
288
                assert!(
512✔
289
                    row.get(column).unwrap().get_quantity().unwrap().value
512✔
290
                        >= next_row.get(column).unwrap().get_quantity().unwrap().value
512✔
291
                );
512✔
292
            }
1✔
293
        }
294
    }
1✔
295

296
    #[test]
1✔
297
    fn sorting_by_mixed_column_fails() {
1✔
298
        let column = Column {
1✔
299
            header: Some("Field".to_string()),
1✔
300
            rows: vec![
1✔
301
                Value::from_str("1.0", &None),
1✔
302
                Value::String("String-Value".to_string()),
1✔
303
            ],
1✔
304
        };
1✔
305
        let mut table = Table {
1✔
306
            columns: vec![column],
1✔
307
        };
1✔
308
        let order_by_name = sort_by_column_name(&mut table, "Field");
1✔
309
        assert!(matches!(
1✔
310
            order_by_name.unwrap_err(),
1✔
311
            Error::UnexpectedValue(_, _)
312
        ));
313

314
        let order_by_id = sort_by_column_id(&mut table, 0);
1✔
315
        assert!(matches!(
1✔
316
            order_by_id.unwrap_err(),
1✔
317
            Error::UnexpectedValue(_, _)
318
        ));
319
    }
1✔
320

321
    #[test]
1✔
322
    fn non_existing_table_fails() {
1✔
323
        let mut table = setup_table(None);
1✔
324
        let order_by_name = sort_by_column_name(&mut table, "Non-Existing-Field");
1✔
325
        assert!(matches!(
1✔
326
            order_by_name.unwrap_err(),
1✔
327
            Error::InvalidAccess(_)
328
        ));
329

330
        let order_by_id = sort_by_column_id(&mut table, 999);
1✔
331
        assert!(matches!(order_by_id.unwrap_err(), Error::InvalidAccess(_)));
1✔
332
    }
1✔
333
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc