• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

VolumeGraphics / havocompare / cbb3d470341b379022395dd011fd0a6f21bf72d1

pending completion
cbb3d470341b379022395dd011fd0a6f21bf72d1

push

github

GitHub
Merge pull request #24 from VolumeGraphics/fix-toml-version-0.2.2

2367 of 2572 relevant lines covered (92.03%)

2279.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.2
/src/csv/preprocessing.rs
1
use crate::csv;
2
use crate::csv::value::Value;
3
use crate::csv::Table;
4
use schemars_derive::JsonSchema;
5
use serde::{Deserialize, Serialize};
6
use std::cmp::Ordering::Equal;
7
use tracing::{debug, warn};
8

9
#[derive(JsonSchema, Deserialize, Serialize, Debug)]
2✔
10
/// Preprocessor options
11
pub enum Preprocessor {
12
    /// Try to extract the headers from the first row - fallible if first row contains a number
13
    ExtractHeaders,
14
    /// Replace all fields in column by number by a deleted marker
15
    DeleteColumnByNumber(usize),
16
    /// Replace all fields in column by name by a deleted marker
17
    DeleteColumnByName(String),
18
    /// Sort rows by column with given name. Fails if no headers were extracted or column name is not found, or if any row has no numbers there
19
    SortByColumnName(String),
20
    /// Sort rows by column with given number. Fails if any row has no numbers there or if out of bounds.
21
    SortByColumnNumber(usize),
22
    /// Replace all fields in row with given number by a deleted marker
23
    DeleteRowByNumber(usize),
24
    /// Replace all fields in row  where at least a single field matches regex by a deleted marker
25
    DeleteRowByRegex(String),
26
}
27

28
impl Preprocessor {
29
    pub(crate) fn process(&self, table: &mut Table) -> Result<(), csv::Error> {
72✔
30
        match self {
72✔
31
            Preprocessor::ExtractHeaders => extract_headers(table),
72✔
32
            Preprocessor::DeleteColumnByNumber(id) => delete_column_number(table, *id),
×
33
            Preprocessor::DeleteColumnByName(name) => delete_column_name(table, name.as_str()),
×
34
            Preprocessor::SortByColumnName(name) => sort_by_column_name(table, name.as_str()),
×
35
            Preprocessor::SortByColumnNumber(id) => sort_by_column_id(table, *id),
×
36
            Preprocessor::DeleteRowByNumber(id) => delete_row_by_number(table, *id),
×
37
            Preprocessor::DeleteRowByRegex(regex) => delete_row_by_regex(table, regex),
×
38
        }
39
    }
72✔
40
}
41

42
fn delete_row_by_regex(table: &mut Table, regex: &str) -> Result<(), csv::Error> {
1✔
43
    let regex = regex::Regex::new(regex)?;
1✔
44
    table
1✔
45
        .rows_mut()
1✔
46
        .filter(|row| row.iter().any(|v| regex.is_match(v.to_string().as_str())))
1,027✔
47
        .for_each(|mut row| row.iter_mut().for_each(|v| **v = Value::deleted()));
2✔
48
    Ok(())
1✔
49
}
1✔
50

51
fn delete_row_by_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
52
    if let Some(mut v) = table.rows_mut().nth(id) {
1✔
53
        v.iter_mut().for_each(|v| **v = Value::deleted())
2✔
54
    }
×
55
    Ok(())
1✔
56
}
1✔
57

58
fn get_permutation(rows_to_sort_by: &Vec<f32>) -> permutation::Permutation {
2✔
59
    permutation::sort_by(rows_to_sort_by, |a, b| b.partial_cmp(a).unwrap_or(Equal))
4,228✔
60
}
2✔
61

62
fn apply_permutation(table: &mut Table, mut permutation: permutation::Permutation) {
2✔
63
    table.columns.iter_mut().for_each(|c| {
4✔
64
        permutation.apply_slice_in_place(&mut c.rows);
4✔
65
    });
4✔
66
}
2✔
67

68
fn sort_by_column_id(table: &mut Table, id: usize) -> Result<(), csv::Error> {
3✔
69
    let sort_master_col = table.columns.get(id).ok_or_else(|| {
3✔
70
        csv::Error::InvalidAccess(format!(
1✔
71
            "Column number sorting by id {} requested but column not found.",
1✔
72
            id
1✔
73
        ))
1✔
74
    })?;
3✔
75
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
76
        .rows
2✔
77
        .iter()
2✔
78
        .map(|v| {
515✔
79
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
80
                csv::Error::UnexpectedValue(
1✔
81
                    v.clone(),
1✔
82
                    "Expected quantity while trying to sort by column id".to_string(),
1✔
83
                )
1✔
84
            })
515✔
85
        })
515✔
86
        .collect();
2✔
87
    let permutation = get_permutation(&col_floats?);
2✔
88
    apply_permutation(table, permutation);
1✔
89
    Ok(())
1✔
90
}
3✔
91

92
fn sort_by_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
3✔
93
    let sort_master_col = table
3✔
94
        .columns
3✔
95
        .iter()
3✔
96
        .find(|c| c.header.as_deref().unwrap_or_default() == name)
5✔
97
        .ok_or_else(|| {
3✔
98
            csv::Error::InvalidAccess(format!(
1✔
99
                "Requested format sorting by column'{}' but column not found.",
1✔
100
                name
1✔
101
            ))
1✔
102
        })?;
3✔
103
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
104
        .rows
2✔
105
        .iter()
2✔
106
        .map(|v| {
515✔
107
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
108
                csv::Error::UnexpectedValue(
1✔
109
                    v.clone(),
1✔
110
                    "Expected quantity while trying to sort by column name".to_string(),
1✔
111
                )
1✔
112
            })
515✔
113
        })
515✔
114
        .collect();
2✔
115
    let permutation = get_permutation(&col_floats?);
2✔
116
    apply_permutation(table, permutation);
1✔
117
    Ok(())
1✔
118
}
3✔
119

120
fn delete_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
121
    if let Some(c) = table
1✔
122
        .columns
1✔
123
        .iter_mut()
1✔
124
        .find(|col| col.header.as_deref().unwrap_or_default() == name)
2✔
125
    {
1✔
126
        c.delete_contents();
1✔
127
    }
1✔
128
    Ok(())
1✔
129
}
1✔
130

131
fn delete_column_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
132
    if let Some(col) = table.columns.get_mut(id) {
1✔
133
        col.delete_contents();
1✔
134
    }
1✔
135
    Ok(())
1✔
136
}
1✔
137

138
fn extract_headers(table: &mut Table) -> Result<(), csv::Error> {
77✔
139
    debug!("Extracting headers...");
77✔
140
    let can_extract = table
77✔
141
        .columns
77✔
142
        .iter()
77✔
143
        .all(|c| matches!(c.rows.first(), Some(Value::String(_))));
186✔
144
    if !can_extract {
77✔
145
        warn!("Cannot extract header for this csv!");
×
146
        return Ok(());
×
147
    }
77✔
148

149
    for col in table.columns.iter_mut() {
186✔
150
        let title = col.rows.drain(0..1).next().ok_or_else(|| {
186✔
151
            csv::Error::InvalidAccess("Tried to extract header of empty column!".to_string())
×
152
        })?;
186✔
153
        if let Value::String(title) = title {
186✔
154
            col.header = Some(title);
186✔
155
        }
186✔
156
    }
157
    Ok(())
77✔
158
}
77✔
159

160
#[cfg(test)]
161
mod tests {
162
    use super::*;
163
    use crate::csv::{Column, Delimiters, Error};
164
    use std::fs::File;
165

166
    fn setup_table(delimiters: Option<Delimiters>) -> Table {
8✔
167
        let delimiters = delimiters.unwrap_or_default();
8✔
168
        Table::from_reader(
8✔
169
            File::open("tests/csv/data/DeviationHistogram.csv").unwrap(),
8✔
170
            &delimiters,
8✔
171
        )
8✔
172
        .unwrap()
8✔
173
    }
8✔
174

175
    #[test]
1✔
176
    fn test_extract_headers() {
1✔
177
        let mut table = setup_table(None);
1✔
178
        extract_headers(&mut table).unwrap();
1✔
179
        assert_eq!(
1✔
180
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
181
            "Deviation [mm]"
1✔
182
        );
1✔
183
        assert_eq!(
1✔
184
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
185
            "Surface [mm²]"
1✔
186
        );
1✔
187
    }
1✔
188

189
    #[test]
1✔
190
    fn test_delete_column_by_id() {
1✔
191
        let mut table = setup_table(None);
1✔
192
        extract_headers(&mut table).unwrap();
1✔
193
        delete_column_number(&mut table, 0).unwrap();
1✔
194
        assert_eq!(
1✔
195
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
196
            "DELETED"
1✔
197
        );
1✔
198
        assert!(table
1✔
199
            .columns
1✔
200
            .first()
1✔
201
            .unwrap()
1✔
202
            .rows
1✔
203
            .iter()
1✔
204
            .all(|v| *v == csv::Value::deleted()));
513✔
205
    }
1✔
206

207
    #[test]
1✔
208
    fn test_delete_column_by_name() {
1✔
209
        let mut table = setup_table(None);
1✔
210
        extract_headers(&mut table).unwrap();
1✔
211
        delete_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
212
        assert_eq!(
1✔
213
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
214
            "DELETED"
1✔
215
        );
1✔
216
        assert!(table
1✔
217
            .columns
1✔
218
            .last()
1✔
219
            .unwrap()
1✔
220
            .rows
1✔
221
            .iter()
1✔
222
            .all(|v| *v == csv::Value::deleted()));
513✔
223
    }
1✔
224

225
    #[test]
1✔
226
    fn test_delete_row_by_id() {
1✔
227
        let mut table = setup_table(None);
1✔
228
        delete_row_by_number(&mut table, 0).unwrap();
1✔
229
        assert_eq!(
1✔
230
            table
1✔
231
                .columns
1✔
232
                .first()
1✔
233
                .unwrap()
1✔
234
                .rows
1✔
235
                .first()
1✔
236
                .unwrap()
1✔
237
                .get_string()
1✔
238
                .as_deref()
1✔
239
                .unwrap(),
1✔
240
            "DELETED"
1✔
241
        );
1✔
242
    }
1✔
243

244
    #[test]
1✔
245
    fn test_delete_row_by_regex() {
1✔
246
        let mut table = setup_table(None);
1✔
247
        delete_row_by_regex(&mut table, "mm").unwrap();
1✔
248
        assert_eq!(
1✔
249
            table
1✔
250
                .columns
1✔
251
                .first()
1✔
252
                .unwrap()
1✔
253
                .rows
1✔
254
                .first()
1✔
255
                .unwrap()
1✔
256
                .get_string()
1✔
257
                .as_deref()
1✔
258
                .unwrap(),
1✔
259
            "DELETED"
1✔
260
        );
1✔
261
    }
1✔
262

263
    #[test]
1✔
264
    fn test_sort_by_name() {
1✔
265
        let mut table = setup_table(None);
1✔
266
        extract_headers(&mut table).unwrap();
1✔
267
        sort_by_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
268
        let mut peekable_rows = table.rows().peekable();
1✔
269
        while let Some(row) = peekable_rows.next() {
514✔
270
            if let Some(next_row) = peekable_rows.peek() {
513✔
271
                assert!(
512✔
272
                    row.get(1).unwrap().get_quantity().unwrap().value
512✔
273
                        >= next_row.get(1).unwrap().get_quantity().unwrap().value
512✔
274
                );
512✔
275
            }
1✔
276
        }
277
    }
1✔
278

279
    #[test]
1✔
280
    fn test_sort_by_id() {
1✔
281
        let mut table = setup_table(None);
1✔
282
        extract_headers(&mut table).unwrap();
1✔
283
        let column = 1;
1✔
284
        sort_by_column_id(&mut table, column).unwrap();
1✔
285
        let mut peekable_rows = table.rows().peekable();
1✔
286
        while let Some(row) = peekable_rows.next() {
514✔
287
            if let Some(next_row) = peekable_rows.peek() {
513✔
288
                assert!(
512✔
289
                    row.get(column).unwrap().get_quantity().unwrap().value
512✔
290
                        >= next_row.get(column).unwrap().get_quantity().unwrap().value
512✔
291
                );
512✔
292
            }
1✔
293
        }
294
    }
1✔
295

296
    #[test]
1✔
297
    fn sorting_by_mixed_column_fails() {
1✔
298
        let column = Column {
1✔
299
            header: Some("Field".to_string()),
1✔
300
            rows: vec![
1✔
301
                Value::from_str("1.0", &None),
1✔
302
                Value::String("String-Value".to_string()),
1✔
303
            ],
1✔
304
        };
1✔
305
        let mut table = Table {
1✔
306
            columns: vec![column],
1✔
307
        };
1✔
308
        let order_by_name = sort_by_column_name(&mut table, "Field");
1✔
309
        assert!(matches!(
1✔
310
            order_by_name.unwrap_err(),
1✔
311
            Error::UnexpectedValue(_, _)
312
        ));
313

314
        let order_by_id = sort_by_column_id(&mut table, 0);
1✔
315
        assert!(matches!(
1✔
316
            order_by_id.unwrap_err(),
1✔
317
            Error::UnexpectedValue(_, _)
318
        ));
319
    }
1✔
320

321
    #[test]
1✔
322
    fn non_existing_table_fails() {
1✔
323
        let mut table = setup_table(None);
1✔
324
        let order_by_name = sort_by_column_name(&mut table, "Non-Existing-Field");
1✔
325
        assert!(matches!(
1✔
326
            order_by_name.unwrap_err(),
1✔
327
            Error::InvalidAccess(_)
328
        ));
329

330
        let order_by_id = sort_by_column_id(&mut table, 999);
1✔
331
        assert!(matches!(order_by_id.unwrap_err(), Error::InvalidAccess(_)));
1✔
332
    }
1✔
333
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc