• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

VolumeGraphics / havocompare / d719d36a60313f500e193ffa93d0cdb438fee09d

pending completion
d719d36a60313f500e193ffa93d0cdb438fee09d

push

github

Christopher Regali
Fix bug with representation of problematic floating point diffs being not fittable in f32.

2429 of 2633 relevant lines covered (92.25%)

2259.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.17
/src/csv/preprocessing.rs
1
use crate::csv;
2
use crate::csv::value::{FloatType, Value};
3
use crate::csv::Table;
4
use schemars_derive::JsonSchema;
5
use serde::{Deserialize, Serialize};
6
use std::cmp::Ordering::Equal;
7
use tracing::{debug, warn};
8

9
#[derive(JsonSchema, Deserialize, Serialize, Debug)]
2✔
10
/// Preprocessor options
11
pub enum Preprocessor {
12
    /// Try to extract the headers from the first row - fallible if first row contains a number
13
    ExtractHeaders,
14
    /// Replace all fields in column by number by a deleted marker
15
    DeleteColumnByNumber(usize),
16
    /// Replace all fields in column by name by a deleted marker
17
    DeleteColumnByName(String),
18
    /// Sort rows by column with given name. Fails if no headers were extracted or column name is not found, or if any row has no numbers there
19
    SortByColumnName(String),
20
    /// Sort rows by column with given number. Fails if any row has no numbers there or if out of bounds.
21
    SortByColumnNumber(usize),
22
    /// Replace all fields in row with given number by a deleted marker
23
    DeleteRowByNumber(usize),
24
    /// Replace all fields in row  where at least a single field matches regex by a deleted marker
25
    DeleteRowByRegex(String),
26
}
27

28
impl Preprocessor {
29
    pub(crate) fn process(&self, table: &mut Table) -> Result<(), csv::Error> {
72✔
30
        match self {
72✔
31
            Preprocessor::ExtractHeaders => extract_headers(table),
72✔
32
            Preprocessor::DeleteColumnByNumber(id) => delete_column_number(table, *id),
×
33
            Preprocessor::DeleteColumnByName(name) => delete_column_name(table, name.as_str()),
×
34
            Preprocessor::SortByColumnName(name) => sort_by_column_name(table, name.as_str()),
×
35
            Preprocessor::SortByColumnNumber(id) => sort_by_column_id(table, *id),
×
36
            Preprocessor::DeleteRowByNumber(id) => delete_row_by_number(table, *id),
×
37
            Preprocessor::DeleteRowByRegex(regex) => delete_row_by_regex(table, regex),
×
38
        }
39
    }
72✔
40
}
41

42
fn delete_row_by_regex(table: &mut Table, regex: &str) -> Result<(), csv::Error> {
1✔
43
    let regex = regex::Regex::new(regex)?;
1✔
44
    table
1✔
45
        .rows_mut()
1✔
46
        .filter(|row| row.iter().any(|v| regex.is_match(v.to_string().as_str())))
1,027✔
47
        .for_each(|mut row| row.iter_mut().for_each(|v| **v = Value::deleted()));
2✔
48
    Ok(())
1✔
49
}
1✔
50

51
fn delete_row_by_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
52
    if let Some(mut v) = table.rows_mut().nth(id) {
1✔
53
        v.iter_mut().for_each(|v| **v = Value::deleted())
2✔
54
    }
×
55
    Ok(())
1✔
56
}
1✔
57

58
fn get_permutation(rows_to_sort_by: &Vec<FloatType>) -> permutation::Permutation {
2✔
59
    permutation::sort_by(rows_to_sort_by, |a, b| b.partial_cmp(a).unwrap_or(Equal))
4,228✔
60
}
2✔
61

62
fn apply_permutation(table: &mut Table, mut permutation: permutation::Permutation) {
2✔
63
    table.columns.iter_mut().for_each(|c| {
4✔
64
        permutation.apply_slice_in_place(&mut c.rows);
4✔
65
    });
4✔
66
}
2✔
67

68
fn sort_by_column_id(table: &mut Table, id: usize) -> Result<(), csv::Error> {
3✔
69
    let sort_master_col = table.columns.get(id).ok_or_else(|| {
3✔
70
        csv::Error::InvalidAccess(format!(
1✔
71
            "Column number sorting by id {id} requested but column not found."
1✔
72
        ))
1✔
73
    })?;
3✔
74
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
75
        .rows
2✔
76
        .iter()
2✔
77
        .map(|v| {
515✔
78
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
79
                csv::Error::UnexpectedValue(
1✔
80
                    v.clone(),
1✔
81
                    "Expected quantity while trying to sort by column id".to_string(),
1✔
82
                )
1✔
83
            })
515✔
84
        })
515✔
85
        .collect();
2✔
86
    let permutation = get_permutation(&col_floats?);
2✔
87
    apply_permutation(table, permutation);
1✔
88
    Ok(())
1✔
89
}
3✔
90

91
fn sort_by_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
3✔
92
    let sort_master_col = table
3✔
93
        .columns
3✔
94
        .iter()
3✔
95
        .find(|c| c.header.as_deref().unwrap_or_default() == name)
5✔
96
        .ok_or_else(|| {
3✔
97
            csv::Error::InvalidAccess(format!(
1✔
98
                "Requested format sorting by column'{name}' but column not found."
1✔
99
            ))
1✔
100
        })?;
3✔
101
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
102
        .rows
2✔
103
        .iter()
2✔
104
        .map(|v| {
515✔
105
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
106
                csv::Error::UnexpectedValue(
1✔
107
                    v.clone(),
1✔
108
                    "Expected quantity while trying to sort by column name".to_string(),
1✔
109
                )
1✔
110
            })
515✔
111
        })
515✔
112
        .collect();
2✔
113
    let permutation = get_permutation(&col_floats?);
2✔
114
    apply_permutation(table, permutation);
1✔
115
    Ok(())
1✔
116
}
3✔
117

118
fn delete_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
119
    if let Some(c) = table
1✔
120
        .columns
1✔
121
        .iter_mut()
1✔
122
        .find(|col| col.header.as_deref().unwrap_or_default() == name)
2✔
123
    {
1✔
124
        c.delete_contents();
1✔
125
    }
1✔
126
    Ok(())
1✔
127
}
1✔
128

129
fn delete_column_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
130
    if let Some(col) = table.columns.get_mut(id) {
1✔
131
        col.delete_contents();
1✔
132
    }
1✔
133
    Ok(())
1✔
134
}
1✔
135

136
fn extract_headers(table: &mut Table) -> Result<(), csv::Error> {
77✔
137
    debug!("Extracting headers...");
77✔
138
    let can_extract = table
77✔
139
        .columns
77✔
140
        .iter()
77✔
141
        .all(|c| matches!(c.rows.first(), Some(Value::String(_))));
186✔
142
    if !can_extract {
77✔
143
        warn!("Cannot extract header for this csv!");
×
144
        return Ok(());
×
145
    }
77✔
146

147
    for col in table.columns.iter_mut() {
186✔
148
        let title = col.rows.drain(0..1).next().ok_or_else(|| {
186✔
149
            csv::Error::InvalidAccess("Tried to extract header of empty column!".to_string())
×
150
        })?;
186✔
151
        if let Value::String(title) = title {
186✔
152
            col.header = Some(title);
186✔
153
        }
186✔
154
    }
155
    Ok(())
77✔
156
}
77✔
157

158
#[cfg(test)]
159
mod tests {
160
    use super::*;
161
    use crate::csv::{Column, Delimiters, Error};
162
    use std::fs::File;
163

164
    fn setup_table(delimiters: Option<Delimiters>) -> Table {
8✔
165
        let delimiters = delimiters.unwrap_or_default();
8✔
166
        Table::from_reader(
8✔
167
            File::open("tests/csv/data/DeviationHistogram.csv").unwrap(),
8✔
168
            &delimiters,
8✔
169
        )
8✔
170
        .unwrap()
8✔
171
    }
8✔
172

173
    #[test]
1✔
174
    fn test_extract_headers() {
1✔
175
        let mut table = setup_table(None);
1✔
176
        extract_headers(&mut table).unwrap();
1✔
177
        assert_eq!(
1✔
178
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
179
            "Deviation [mm]"
1✔
180
        );
1✔
181
        assert_eq!(
1✔
182
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
183
            "Surface [mm²]"
1✔
184
        );
1✔
185
    }
1✔
186

187
    #[test]
1✔
188
    fn test_delete_column_by_id() {
1✔
189
        let mut table = setup_table(None);
1✔
190
        extract_headers(&mut table).unwrap();
1✔
191
        delete_column_number(&mut table, 0).unwrap();
1✔
192
        assert_eq!(
1✔
193
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
194
            "DELETED"
1✔
195
        );
1✔
196
        assert!(table
1✔
197
            .columns
1✔
198
            .first()
1✔
199
            .unwrap()
1✔
200
            .rows
1✔
201
            .iter()
1✔
202
            .all(|v| *v == csv::Value::deleted()));
513✔
203
    }
1✔
204

205
    #[test]
1✔
206
    fn test_delete_column_by_name() {
1✔
207
        let mut table = setup_table(None);
1✔
208
        extract_headers(&mut table).unwrap();
1✔
209
        delete_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
210
        assert_eq!(
1✔
211
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
212
            "DELETED"
1✔
213
        );
1✔
214
        assert!(table
1✔
215
            .columns
1✔
216
            .last()
1✔
217
            .unwrap()
1✔
218
            .rows
1✔
219
            .iter()
1✔
220
            .all(|v| *v == csv::Value::deleted()));
513✔
221
    }
1✔
222

223
    #[test]
1✔
224
    fn test_delete_row_by_id() {
1✔
225
        let mut table = setup_table(None);
1✔
226
        delete_row_by_number(&mut table, 0).unwrap();
1✔
227
        assert_eq!(
1✔
228
            table
1✔
229
                .columns
1✔
230
                .first()
1✔
231
                .unwrap()
1✔
232
                .rows
1✔
233
                .first()
1✔
234
                .unwrap()
1✔
235
                .get_string()
1✔
236
                .as_deref()
1✔
237
                .unwrap(),
1✔
238
            "DELETED"
1✔
239
        );
1✔
240
    }
1✔
241

242
    #[test]
1✔
243
    fn test_delete_row_by_regex() {
1✔
244
        let mut table = setup_table(None);
1✔
245
        delete_row_by_regex(&mut table, "mm").unwrap();
1✔
246
        assert_eq!(
1✔
247
            table
1✔
248
                .columns
1✔
249
                .first()
1✔
250
                .unwrap()
1✔
251
                .rows
1✔
252
                .first()
1✔
253
                .unwrap()
1✔
254
                .get_string()
1✔
255
                .as_deref()
1✔
256
                .unwrap(),
1✔
257
            "DELETED"
1✔
258
        );
1✔
259
    }
1✔
260

261
    #[test]
1✔
262
    fn test_sort_by_name() {
1✔
263
        let mut table = setup_table(None);
1✔
264
        extract_headers(&mut table).unwrap();
1✔
265
        sort_by_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
266
        let mut peekable_rows = table.rows().peekable();
1✔
267
        while let Some(row) = peekable_rows.next() {
514✔
268
            if let Some(next_row) = peekable_rows.peek() {
513✔
269
                assert!(
512✔
270
                    row.get(1).unwrap().get_quantity().unwrap().value
512✔
271
                        >= next_row.get(1).unwrap().get_quantity().unwrap().value
512✔
272
                );
512✔
273
            }
1✔
274
        }
275
    }
1✔
276

277
    #[test]
1✔
278
    fn test_sort_by_id() {
1✔
279
        let mut table = setup_table(None);
1✔
280
        extract_headers(&mut table).unwrap();
1✔
281
        let column = 1;
1✔
282
        sort_by_column_id(&mut table, column).unwrap();
1✔
283
        let mut peekable_rows = table.rows().peekable();
1✔
284
        while let Some(row) = peekable_rows.next() {
514✔
285
            if let Some(next_row) = peekable_rows.peek() {
513✔
286
                assert!(
512✔
287
                    row.get(column).unwrap().get_quantity().unwrap().value
512✔
288
                        >= next_row.get(column).unwrap().get_quantity().unwrap().value
512✔
289
                );
512✔
290
            }
1✔
291
        }
292
    }
1✔
293

294
    #[test]
1✔
295
    fn sorting_by_mixed_column_fails() {
1✔
296
        let column = Column {
1✔
297
            header: Some("Field".to_string()),
1✔
298
            rows: vec![
1✔
299
                Value::from_str("1.0", &None),
1✔
300
                Value::String("String-Value".to_string()),
1✔
301
            ],
1✔
302
        };
1✔
303
        let mut table = Table {
1✔
304
            columns: vec![column],
1✔
305
        };
1✔
306
        let order_by_name = sort_by_column_name(&mut table, "Field");
1✔
307
        assert!(matches!(
1✔
308
            order_by_name.unwrap_err(),
1✔
309
            Error::UnexpectedValue(_, _)
310
        ));
311

312
        let order_by_id = sort_by_column_id(&mut table, 0);
1✔
313
        assert!(matches!(
1✔
314
            order_by_id.unwrap_err(),
1✔
315
            Error::UnexpectedValue(_, _)
316
        ));
317
    }
1✔
318

319
    #[test]
1✔
320
    fn non_existing_table_fails() {
1✔
321
        let mut table = setup_table(None);
1✔
322
        let order_by_name = sort_by_column_name(&mut table, "Non-Existing-Field");
1✔
323
        assert!(matches!(
1✔
324
            order_by_name.unwrap_err(),
1✔
325
            Error::InvalidAccess(_)
326
        ));
327

328
        let order_by_id = sort_by_column_id(&mut table, 999);
1✔
329
        assert!(matches!(order_by_id.unwrap_err(), Error::InvalidAccess(_)));
1✔
330
    }
1✔
331
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc