• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

VolumeGraphics / havocompare / 8092875626

29 Feb 2024 08:09AM UTC coverage: 7.015% (-76.8%) from 83.823%
8092875626

push

github

rohdealx
use grcov

8046 of 114694 relevant lines covered (7.02%)

309.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/csv/preprocessing.rs
1
use crate::csv;
2
use crate::csv::value::Value;
3
use crate::csv::Table;
4
use schemars_derive::JsonSchema;
5
use serde::{Deserialize, Serialize};
6
use std::cmp::Ordering::Equal;
7
use tracing::{debug, warn};
8

9
#[derive(JsonSchema, Deserialize, Serialize, Debug, Clone)]
×
10
/// Preprocessor options
11
pub enum Preprocessor {
12
    /// Try to extract the headers from the first row - fallible if first row contains a number
13
    ExtractHeaders,
14
    /// Replace all fields in column by number by a deleted marker
15
    DeleteColumnByNumber(usize),
16
    /// Replace all fields in column by name by a deleted marker
17
    DeleteColumnByName(String),
18
    /// Sort rows by column with given name. Fails if no headers were extracted or column name is not found, or if any row has no numbers there
19
    SortByColumnName(String),
20
    /// Sort rows by column with given number. Fails if any row has no numbers there or if out of bounds.
21
    SortByColumnNumber(usize),
22
    /// Replace all fields in row with given number by a deleted marker
23
    DeleteRowByNumber(usize),
24
    /// Replace all fields in row  where at least a single field matches regex by a deleted marker
25
    DeleteRowByRegex(String),
26
    /// replace found cell using row and column index by a deleted marker
27
    DeleteCellByNumber {
28
        /// column number
29
        column: usize,
30
        /// row number
31
        row: usize,
32
    },
33
    /// replace found cell using column header and row index by a deleted marker
34
    DeleteCellByName {
35
        /// column with given name
36
        column: String,
37
        /// row number
38
        row: usize,
39
    },
40
}
41

42
impl Preprocessor {
43
    pub(crate) fn process(&self, table: &mut Table) -> Result<(), csv::Error> {
×
44
        match self {
×
45
            Preprocessor::ExtractHeaders => extract_headers(table),
×
46
            Preprocessor::DeleteColumnByNumber(id) => delete_column_number(table, *id),
×
47
            Preprocessor::DeleteColumnByName(name) => delete_column_name(table, name.as_str()),
×
48
            Preprocessor::SortByColumnName(name) => sort_by_column_name(table, name.as_str()),
×
49
            Preprocessor::SortByColumnNumber(id) => sort_by_column_id(table, *id),
×
50
            Preprocessor::DeleteRowByNumber(id) => delete_row_by_number(table, *id),
×
51
            Preprocessor::DeleteRowByRegex(regex) => delete_row_by_regex(table, regex),
×
52
            Preprocessor::DeleteCellByNumber { column, row } => {
×
53
                delete_cell_by_number(table, *column, *row)
×
54
            }
55
            Preprocessor::DeleteCellByName { column, row } => {
×
56
                delete_cell_by_column_name_and_row_number(table, column, *row)
×
57
            }
58
        }
59
    }
×
60
}
61

62
fn delete_row_by_regex(table: &mut Table, regex: &str) -> Result<(), csv::Error> {
×
63
    let regex = regex::Regex::new(regex)?;
×
64
    table
×
65
        .rows_mut()
×
66
        .filter(|row| row.iter().any(|v| regex.is_match(v.to_string().as_str())))
×
67
        .for_each(|mut row| row.iter_mut().for_each(|v| **v = Value::deleted()));
×
68
    Ok(())
×
69
}
×
70

71
fn delete_row_by_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
×
72
    if let Some(mut v) = table.rows_mut().nth(id) {
×
73
        v.iter_mut().for_each(|v| **v = Value::deleted())
×
74
    }
×
75
    Ok(())
×
76
}
×
77

78
fn delete_cell_by_number(table: &mut Table, column: usize, row: usize) -> Result<(), csv::Error> {
×
79
    let value = table
×
80
        .columns
×
81
        .get_mut(column)
×
82
        .ok_or_else(|| {
×
83
            csv::Error::InvalidAccess(format!("Cell with column number {} not found.", column))
×
84
        })?
×
85
        .rows
86
        .get_mut(row)
×
87
        .ok_or_else(|| {
×
88
            csv::Error::InvalidAccess(format!("Cell with row number {} not found.", row))
×
89
        })?;
×
90

91
    *value = Value::deleted();
×
92

×
93
    Ok(())
×
94
}
×
95

96
fn delete_cell_by_column_name_and_row_number(
×
97
    table: &mut Table,
×
98
    column: &str,
×
99
    row: usize,
×
100
) -> Result<(), csv::Error> {
×
101
    let value = table
×
102
        .columns
×
103
        .iter_mut()
×
104
        .find(|col| col.header.as_deref().unwrap_or_default() == column)
×
105
        .ok_or_else(|| {
×
106
            csv::Error::InvalidAccess(format!("Cell with column name '{}' not found.", column))
×
107
        })?
×
108
        .rows
109
        .get_mut(row)
×
110
        .ok_or_else(|| {
×
111
            csv::Error::InvalidAccess(format!("Cell with row number {} not found.", row))
×
112
        })?;
×
113

114
    *value = Value::deleted();
×
115

×
116
    Ok(())
×
117
}
×
118

119
fn get_permutation(rows_to_sort_by: &Vec<f64>) -> permutation::Permutation {
×
120
    permutation::sort_by(rows_to_sort_by, |a, b| b.partial_cmp(a).unwrap_or(Equal))
×
121
}
×
122

123
fn apply_permutation(table: &mut Table, mut permutation: permutation::Permutation) {
×
124
    table.columns.iter_mut().for_each(|c| {
×
125
        permutation.apply_slice_in_place(&mut c.rows);
×
126
    });
×
127
}
×
128

129
fn sort_by_column_id(table: &mut Table, id: usize) -> Result<(), csv::Error> {
×
130
    let sort_master_col = table.columns.get(id).ok_or_else(|| {
×
131
        csv::Error::InvalidAccess(format!(
×
132
            "Column number sorting by id {id} requested but column not found."
×
133
        ))
×
134
    })?;
×
135
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
×
136
        .rows
×
137
        .iter()
×
138
        .map(|v| {
×
139
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
×
140
                csv::Error::UnexpectedValue(
×
141
                    v.clone(),
×
142
                    "Expected quantity while trying to sort by column id".to_string(),
×
143
                )
×
144
            })
×
145
        })
×
146
        .collect();
×
147
    let permutation = get_permutation(&col_floats?);
×
148
    apply_permutation(table, permutation);
×
149
    Ok(())
×
150
}
×
151

152
fn sort_by_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
×
153
    let sort_master_col = table
×
154
        .columns
×
155
        .iter()
×
156
        .find(|c| c.header.as_deref().unwrap_or_default() == name)
×
157
        .ok_or_else(|| {
×
158
            csv::Error::InvalidAccess(format!(
×
159
                "Requested format sorting by column'{name}' but column not found."
×
160
            ))
×
161
        })?;
×
162
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
×
163
        .rows
×
164
        .iter()
×
165
        .map(|v| {
×
166
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
×
167
                csv::Error::UnexpectedValue(
×
168
                    v.clone(),
×
169
                    "Expected quantity while trying to sort by column name".to_string(),
×
170
                )
×
171
            })
×
172
        })
×
173
        .collect();
×
174
    let permutation = get_permutation(&col_floats?);
×
175
    apply_permutation(table, permutation);
×
176
    Ok(())
×
177
}
×
178

179
fn delete_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
×
180
    if let Some(c) = table
×
181
        .columns
×
182
        .iter_mut()
×
183
        .find(|col| col.header.as_deref().unwrap_or_default() == name)
×
184
    {
×
185
        c.delete_contents();
×
186
    }
×
187
    Ok(())
×
188
}
×
189

190
fn delete_column_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
×
191
    if let Some(col) = table.columns.get_mut(id) {
×
192
        col.delete_contents();
×
193
    }
×
194
    Ok(())
×
195
}
×
196

197
fn extract_headers(table: &mut Table) -> Result<(), csv::Error> {
×
198
    debug!("Extracting headers...");
×
199
    let can_extract = table
×
200
        .columns
×
201
        .iter()
×
202
        .all(|c| matches!(c.rows.first(), Some(Value::String(_))));
×
203
    if !can_extract {
×
204
        warn!("Cannot extract header for this csv!");
×
205
        return Ok(());
×
206
    }
×
207

208
    for col in table.columns.iter_mut() {
×
209
        let title = col.rows.drain(0..1).next().ok_or_else(|| {
×
210
            csv::Error::InvalidAccess("Tried to extract header of empty column!".to_string())
×
211
        })?;
×
212
        if let Value::String(title) = title {
×
213
            col.header = Some(title);
×
214
        }
×
215
    }
216
    Ok(())
×
217
}
×
218

219
#[cfg(test)]
220
mod tests {
221
    use super::*;
222
    use crate::csv::{Column, Delimiters, Error};
223
    use std::fs::File;
224

225
    fn setup_table(delimiters: Option<Delimiters>) -> Table {
226
        let delimiters = delimiters.unwrap_or_default();
227
        Table::from_reader(
228
            File::open("tests/csv/data/DeviationHistogram.csv").unwrap(),
229
            &delimiters,
230
        )
231
        .unwrap()
232
    }
233

234
    fn setup_table_two(delimiters: Option<Delimiters>) -> Table {
235
        let delimiters = delimiters.unwrap_or_default();
236
        Table::from_reader(
237
            File::open("tests/csv/data/defects_headers.csv").unwrap(),
238
            &delimiters,
239
        )
240
        .unwrap()
241
    }
242

243
    #[test]
244
    fn test_extract_headers_two() {
245
        let mut table = setup_table_two(None);
246
        extract_headers(&mut table).unwrap();
247
        assert_eq!(
248
            table.columns.first().unwrap().header.as_deref().unwrap(),
249
            "Entry"
250
        );
251
        assert_eq!(
252
            table.columns.last().unwrap().header.as_deref().unwrap(),
253
            "Radius"
254
        );
255
    }
256

257
    #[test]
258
    fn test_extract_headers() {
259
        let mut table = setup_table(None);
260
        extract_headers(&mut table).unwrap();
261
        assert_eq!(
262
            table.columns.first().unwrap().header.as_deref().unwrap(),
263
            "Deviation [mm]"
264
        );
265
        assert_eq!(
266
            table.columns.last().unwrap().header.as_deref().unwrap(),
267
            "Surface [mm²]"
268
        );
269
    }
270

271
    #[test]
272
    fn test_delete_column_by_id() {
273
        let mut table = setup_table(None);
274
        extract_headers(&mut table).unwrap();
275
        delete_column_number(&mut table, 0).unwrap();
276
        assert_eq!(
277
            table.columns.first().unwrap().header.as_deref().unwrap(),
278
            "DELETED"
279
        );
280
        assert!(table
281
            .columns
282
            .first()
283
            .unwrap()
284
            .rows
285
            .iter()
286
            .all(|v| *v == Value::deleted()));
287
    }
288

289
    #[test]
290
    fn test_delete_column_by_name() {
291
        let mut table = setup_table(None);
292
        extract_headers(&mut table).unwrap();
293
        delete_column_name(&mut table, "Surface [mm²]").unwrap();
294
        assert_eq!(
295
            table.columns.last().unwrap().header.as_deref().unwrap(),
296
            "DELETED"
297
        );
298
        assert!(table
299
            .columns
300
            .last()
301
            .unwrap()
302
            .rows
303
            .iter()
304
            .all(|v| *v == Value::deleted()));
305
    }
306

307
    #[test]
308
    fn test_delete_row_by_id() {
309
        let mut table = setup_table(None);
310
        delete_row_by_number(&mut table, 0).unwrap();
311
        assert_eq!(
312
            table
313
                .columns
314
                .first()
315
                .unwrap()
316
                .rows
317
                .first()
318
                .unwrap()
319
                .get_string()
320
                .as_deref()
321
                .unwrap(),
322
            "DELETED"
323
        );
324
    }
325

326
    #[test]
327
    fn test_delete_row_by_regex() {
328
        let mut table = setup_table(None);
329
        delete_row_by_regex(&mut table, "mm").unwrap();
330
        assert_eq!(
331
            table
332
                .columns
333
                .first()
334
                .unwrap()
335
                .rows
336
                .first()
337
                .unwrap()
338
                .get_string()
339
                .as_deref()
340
                .unwrap(),
341
            "DELETED"
342
        );
343
    }
344

345
    #[test]
346
    fn test_sort_by_name() {
347
        let mut table = setup_table(None);
348
        extract_headers(&mut table).unwrap();
349
        sort_by_column_name(&mut table, "Surface [mm²]").unwrap();
350
        let mut peekable_rows = table.rows().peekable();
351
        while let Some(row) = peekable_rows.next() {
352
            if let Some(next_row) = peekable_rows.peek() {
353
                assert!(
354
                    row.get(1).unwrap().get_quantity().unwrap().value
355
                        >= next_row.get(1).unwrap().get_quantity().unwrap().value
356
                );
357
            }
358
        }
359
    }
360

361
    #[test]
362
    fn test_sort_by_id() {
363
        let mut table = setup_table(None);
364
        extract_headers(&mut table).unwrap();
365
        let column = 1;
366
        sort_by_column_id(&mut table, column).unwrap();
367
        let mut peekable_rows = table.rows().peekable();
368
        while let Some(row) = peekable_rows.next() {
369
            if let Some(next_row) = peekable_rows.peek() {
370
                assert!(
371
                    row.get(column).unwrap().get_quantity().unwrap().value
372
                        >= next_row.get(column).unwrap().get_quantity().unwrap().value
373
                );
374
            }
375
        }
376
    }
377

378
    #[test]
379
    fn sorting_by_mixed_column_fails() {
380
        let column = Column {
381
            header: Some("Field".to_string()),
382
            rows: vec![
383
                Value::from_str("1.0", &None),
384
                Value::String("String-Value".to_string()),
385
            ],
386
        };
387
        let mut table = Table {
388
            columns: vec![column],
389
        };
390
        let order_by_name = sort_by_column_name(&mut table, "Field");
391
        assert!(matches!(
392
            order_by_name.unwrap_err(),
393
            Error::UnexpectedValue(_, _)
394
        ));
395

396
        let order_by_id = sort_by_column_id(&mut table, 0);
397
        assert!(matches!(
398
            order_by_id.unwrap_err(),
399
            Error::UnexpectedValue(_, _)
400
        ));
401
    }
402

403
    #[test]
404
    fn non_existing_table_fails() {
405
        let mut table = setup_table(None);
406
        let order_by_name = sort_by_column_name(&mut table, "Non-Existing-Field");
407
        assert!(matches!(
408
            order_by_name.unwrap_err(),
409
            Error::InvalidAccess(_)
410
        ));
411

412
        let order_by_id = sort_by_column_id(&mut table, 999);
413
        assert!(matches!(order_by_id.unwrap_err(), Error::InvalidAccess(_)));
414
    }
415

416
    #[test]
417
    fn test_delete_cell_by_numb() {
418
        let mut table = setup_table(None);
419
        delete_cell_by_number(&mut table, 1, 2).unwrap();
420

421
        assert_eq!(
422
            table
423
                .columns
424
                .get(1)
425
                .unwrap()
426
                .rows
427
                .get(2)
428
                .unwrap()
429
                .get_string()
430
                .as_deref()
431
                .unwrap(),
432
            "DELETED"
433
        );
434

435
        assert_ne!(
436
            table
437
                .columns
438
                .get(1)
439
                .unwrap()
440
                .rows
441
                .first()
442
                .unwrap()
443
                .get_string()
444
                .as_deref()
445
                .unwrap(),
446
            "DELETED"
447
        );
448

449
        assert_eq!(
450
            table
451
                .columns
452
                .first()
453
                .unwrap()
454
                .rows
455
                .get(1)
456
                .unwrap()
457
                .get_string(),
458
            None
459
        );
460
    }
461

462
    #[test]
463
    fn test_delete_cell_by_name() {
464
        let mut table = setup_table(None);
465
        extract_headers(&mut table).unwrap();
466
        delete_cell_by_column_name_and_row_number(&mut table, "Surface [mm²]", 1).unwrap();
467

468
        assert_eq!(
469
            table
470
                .columns
471
                .get(1)
472
                .unwrap()
473
                .rows
474
                .get(1)
475
                .unwrap()
476
                .get_string()
477
                .as_deref()
478
                .unwrap(),
479
            "DELETED"
480
        );
481

482
        assert_eq!(
483
            table
484
                .columns
485
                .get(1)
486
                .unwrap()
487
                .rows
488
                .get(3)
489
                .unwrap()
490
                .get_string(),
491
            None
492
        );
493

494
        assert_eq!(
495
            table
496
                .columns
497
                .first()
498
                .unwrap()
499
                .rows
500
                .get(1)
501
                .unwrap()
502
                .get_string(),
503
            None
504
        );
505
    }
506
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc