• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

VolumeGraphics / havocompare / 15018908523

14 May 2025 10:54AM UTC coverage: 81.788% (-2.2%) from 83.973%
15018908523

push

github

web-flow
Merge pull request #54 from VolumeGraphics/file-exist-checker

File exist checker

159 of 244 new or added lines in 4 files covered. (65.16%)

18 existing lines in 5 files now uncovered.

2735 of 3344 relevant lines covered (81.79%)

2724.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.61
/src/csv/preprocessing.rs
1
use crate::csv;
2
use crate::csv::value::Value;
3
use crate::csv::Table;
4
use schemars_derive::JsonSchema;
5
use serde::{Deserialize, Serialize};
6
use std::cmp::Ordering::Equal;
7
use tracing::{debug, warn};
8

9
#[derive(JsonSchema, Deserialize, Serialize, Debug, Clone)]
10
/// Preprocessor options
11
pub enum Preprocessor {
12
    /// Try to extract the headers from the first row - fallible if first row contains a number
13
    ExtractHeaders,
14
    /// Replace all fields in column by number by a deleted marker
15
    DeleteColumnByNumber(usize),
16
    /// Replace all fields in column by name by a deleted marker
17
    DeleteColumnByName(String),
18
    /// Sort rows by column with given name. Fails if no headers were extracted or column name is not found, or if any row has no numbers there
19
    SortByColumnName(String),
20
    /// Sort rows by column with given number. Fails if any row has no numbers there or if out of bounds.
21
    SortByColumnNumber(usize),
22
    /// Replace all fields in row with given number by a deleted marker
23
    DeleteRowByNumber(usize),
24
    /// Replace all fields in row  where at least a single field matches regex by a deleted marker
25
    DeleteRowByRegex(String),
26
    /// replace found cell using row and column index by a deleted marker
27
    DeleteCellByNumber {
28
        /// column number
29
        column: usize,
30
        /// row number
31
        row: usize,
32
    },
33
    /// replace found cell using column header and row index by a deleted marker
34
    DeleteCellByName {
35
        /// column with given name
36
        column: String,
37
        /// row number
38
        row: usize,
39
    },
40
}
41

42
impl Preprocessor {
43
    pub(crate) fn process(&self, table: &mut Table) -> Result<(), csv::Error> {
146✔
44
        match self {
146✔
45
            Preprocessor::ExtractHeaders => extract_headers(table),
146✔
46
            Preprocessor::DeleteColumnByNumber(id) => delete_column_number(table, *id),
×
47
            Preprocessor::DeleteColumnByName(name) => delete_column_name(table, name.as_str()),
×
48
            Preprocessor::SortByColumnName(name) => sort_by_column_name(table, name.as_str()),
×
49
            Preprocessor::SortByColumnNumber(id) => sort_by_column_id(table, *id),
×
50
            Preprocessor::DeleteRowByNumber(id) => delete_row_by_number(table, *id),
×
51
            Preprocessor::DeleteRowByRegex(regex) => delete_row_by_regex(table, regex),
×
52
            Preprocessor::DeleteCellByNumber { column, row } => {
×
53
                delete_cell_by_number(table, *column, *row)
×
54
            }
55
            Preprocessor::DeleteCellByName { column, row } => {
×
56
                delete_cell_by_column_name_and_row_number(table, column, *row)
×
57
            }
58
        }
59
    }
146✔
60
}
61

62
fn delete_row_by_regex(table: &mut Table, regex: &str) -> Result<(), csv::Error> {
1✔
63
    let regex = regex::Regex::new(regex)?;
1✔
64
    table
1✔
65
        .rows_mut()
1✔
66
        .filter(|row| row.iter().any(|v| regex.is_match(v.to_string().as_str())))
1,027✔
67
        .for_each(|mut row| row.iter_mut().for_each(|v| **v = Value::deleted()));
2✔
68
    Ok(())
1✔
69
}
1✔
70

71
fn delete_row_by_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
1✔
72
    if let Some(mut v) = table.rows_mut().nth(id) {
1✔
73
        v.iter_mut().for_each(|v| **v = Value::deleted())
2✔
74
    }
×
75
    Ok(())
1✔
76
}
1✔
77

78
fn delete_cell_by_number(table: &mut Table, column: usize, row: usize) -> Result<(), csv::Error> {
1✔
79
    let value = table
1✔
80
        .columns
1✔
81
        .get_mut(column)
1✔
82
        .ok_or_else(|| {
1✔
83
            csv::Error::InvalidAccess(format!("Cell with column number {} not found.", column))
×
UNCOV
84
        })?
×
85
        .rows
86
        .get_mut(row)
1✔
87
        .ok_or_else(|| {
1✔
88
            csv::Error::InvalidAccess(format!("Cell with row number {} not found.", row))
×
UNCOV
89
        })?;
×
90

91
    *value = Value::deleted();
1✔
92

93
    Ok(())
1✔
94
}
1✔
95

96
fn delete_cell_by_column_name_and_row_number(
1✔
97
    table: &mut Table,
1✔
98
    column: &str,
1✔
99
    row: usize,
1✔
100
) -> Result<(), csv::Error> {
1✔
101
    let value = table
1✔
102
        .columns
1✔
103
        .iter_mut()
1✔
104
        .find(|col| col.header.as_deref().unwrap_or_default() == column)
2✔
105
        .ok_or_else(|| {
1✔
106
            csv::Error::InvalidAccess(format!("Cell with column name '{}' not found.", column))
×
UNCOV
107
        })?
×
108
        .rows
109
        .get_mut(row)
1✔
110
        .ok_or_else(|| {
1✔
111
            csv::Error::InvalidAccess(format!("Cell with row number {} not found.", row))
×
UNCOV
112
        })?;
×
113

114
    *value = Value::deleted();
1✔
115

116
    Ok(())
1✔
117
}
1✔
118

119
fn get_permutation(rows_to_sort_by: &Vec<f64>) -> permutation::Permutation {
2✔
120
    permutation::sort_by(rows_to_sort_by, |a, b| b.partial_cmp(a).unwrap_or(Equal))
6,862✔
121
}
2✔
122

123
fn apply_permutation(table: &mut Table, mut permutation: permutation::Permutation) {
2✔
124
    table.columns.iter_mut().for_each(|c| {
4✔
125
        permutation.apply_slice_in_place(&mut c.rows);
4✔
126
    });
4✔
127
}
2✔
128

129
fn sort_by_column_id(table: &mut Table, id: usize) -> Result<(), csv::Error> {
3✔
130
    let sort_master_col = table.columns.get(id).ok_or_else(|| {
3✔
131
        csv::Error::InvalidAccess(format!(
1✔
132
            "Column number sorting by id {id} requested but column not found."
1✔
133
        ))
1✔
134
    })?;
1✔
135
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
136
        .rows
2✔
137
        .iter()
2✔
138
        .map(|v| {
515✔
139
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
140
                csv::Error::UnexpectedValue(
1✔
141
                    v.clone(),
1✔
142
                    "Expected quantity while trying to sort by column id".to_string(),
1✔
143
                )
1✔
144
            })
1✔
145
        })
515✔
146
        .collect();
2✔
147
    let permutation = get_permutation(&col_floats?);
2✔
148
    apply_permutation(table, permutation);
1✔
149
    Ok(())
1✔
150
}
3✔
151

152
fn sort_by_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
3✔
153
    let sort_master_col = table
3✔
154
        .columns
3✔
155
        .iter()
3✔
156
        .find(|c| c.header.as_deref().unwrap_or_default() == name)
5✔
157
        .ok_or_else(|| {
3✔
158
            csv::Error::InvalidAccess(format!(
1✔
159
                "Requested format sorting by column'{name}' but column not found."
1✔
160
            ))
1✔
161
        })?;
1✔
162
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
163
        .rows
2✔
164
        .iter()
2✔
165
        .map(|v| {
515✔
166
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
167
                csv::Error::UnexpectedValue(
1✔
168
                    v.clone(),
1✔
169
                    "Expected quantity while trying to sort by column name".to_string(),
1✔
170
                )
1✔
171
            })
1✔
172
        })
515✔
173
        .collect();
2✔
174
    let permutation = get_permutation(&col_floats?);
2✔
175
    apply_permutation(table, permutation);
1✔
176
    Ok(())
1✔
177
}
3✔
178

179
fn delete_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
1✔
180
    if let Some(c) = table
1✔
181
        .columns
1✔
182
        .iter_mut()
1✔
183
        .find(|col| col.header.as_deref().unwrap_or_default() == name)
2✔
184
    {
1✔
185
        c.delete_contents();
1✔
186
    }
1✔
187
    Ok(())
1✔
188
}
1✔
189

190
fn delete_column_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
1✔
191
    if let Some(col) = table.columns.get_mut(id) {
1✔
192
        col.delete_contents();
1✔
193
    }
1✔
194
    Ok(())
1✔
195
}
1✔
196

197
fn extract_headers(table: &mut Table) -> Result<(), csv::Error> {
153✔
198
    debug!("Extracting headers...");
153✔
199
    let can_extract = table
153✔
200
        .columns
153✔
201
        .iter()
153✔
202
        .all(|c| matches!(c.rows.first(), Some(Value::String(_))));
392✔
203
    if !can_extract {
153✔
204
        warn!("Cannot extract header for this csv!");
×
205
        return Ok(());
×
206
    }
153✔
207

208
    for col in table.columns.iter_mut() {
392✔
209
        let title = col.rows.drain(0..1).next().ok_or_else(|| {
392✔
210
            csv::Error::InvalidAccess("Tried to extract header of empty column!".to_string())
×
UNCOV
211
        })?;
×
212
        if let Value::String(title) = title {
392✔
213
            col.header = Some(title);
392✔
214
        }
392✔
215
    }
216
    Ok(())
153✔
217
}
153✔
218

219
#[cfg(test)]
220
mod tests {
221
    use super::*;
222
    use crate::csv::{Column, Delimiters, Error};
223
    use std::fs::File;
224

225
    fn setup_table(delimiters: Option<Delimiters>) -> Table {
10✔
226
        let delimiters = delimiters.unwrap_or_default();
10✔
227
        Table::from_reader(
10✔
228
            File::open("tests/csv/data/DeviationHistogram.csv").unwrap(),
10✔
229
            &delimiters,
10✔
230
        )
231
        .unwrap()
10✔
232
    }
10✔
233

234
    fn setup_table_two(delimiters: Option<Delimiters>) -> Table {
1✔
235
        let delimiters = delimiters.unwrap_or_default();
1✔
236
        Table::from_reader(
1✔
237
            File::open("tests/csv/data/defects_headers.csv").unwrap(),
1✔
238
            &delimiters,
1✔
239
        )
240
        .unwrap()
1✔
241
    }
1✔
242

243
    #[test]
244
    fn test_extract_headers_two() {
1✔
245
        let mut table = setup_table_two(None);
1✔
246
        extract_headers(&mut table).unwrap();
1✔
247
        assert_eq!(
1✔
248
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
249
            "Entry"
250
        );
251
        assert_eq!(
1✔
252
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
253
            "Radius"
254
        );
255
    }
1✔
256

257
    #[test]
258
    fn test_extract_headers() {
1✔
259
        let mut table = setup_table(None);
1✔
260
        extract_headers(&mut table).unwrap();
1✔
261
        assert_eq!(
1✔
262
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
263
            "Deviation [mm]"
264
        );
265
        assert_eq!(
1✔
266
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
267
            "Surface [mm²]"
268
        );
269
    }
1✔
270

271
    #[test]
272
    fn test_delete_column_by_id() {
1✔
273
        let mut table = setup_table(None);
1✔
274
        extract_headers(&mut table).unwrap();
1✔
275
        delete_column_number(&mut table, 0).unwrap();
1✔
276
        assert_eq!(
1✔
277
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
278
            "DELETED"
279
        );
280
        assert!(table
1✔
281
            .columns
1✔
282
            .first()
1✔
283
            .unwrap()
1✔
284
            .rows
1✔
285
            .iter()
1✔
286
            .all(|v| *v == Value::deleted()));
513✔
287
    }
1✔
288

289
    #[test]
290
    fn test_delete_column_by_name() {
1✔
291
        let mut table = setup_table(None);
1✔
292
        extract_headers(&mut table).unwrap();
1✔
293
        delete_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
294
        assert_eq!(
1✔
295
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
296
            "DELETED"
297
        );
298
        assert!(table
1✔
299
            .columns
1✔
300
            .last()
1✔
301
            .unwrap()
1✔
302
            .rows
1✔
303
            .iter()
1✔
304
            .all(|v| *v == Value::deleted()));
513✔
305
    }
1✔
306

307
    #[test]
308
    fn test_delete_row_by_id() {
1✔
309
        let mut table = setup_table(None);
1✔
310
        delete_row_by_number(&mut table, 0).unwrap();
1✔
311
        assert_eq!(
1✔
312
            table
1✔
313
                .columns
1✔
314
                .first()
1✔
315
                .unwrap()
1✔
316
                .rows
1✔
317
                .first()
1✔
318
                .unwrap()
1✔
319
                .get_string()
1✔
320
                .as_deref()
1✔
321
                .unwrap(),
1✔
322
            "DELETED"
323
        );
324
    }
1✔
325

326
    #[test]
327
    fn test_delete_row_by_regex() {
1✔
328
        let mut table = setup_table(None);
1✔
329
        delete_row_by_regex(&mut table, "mm").unwrap();
1✔
330
        assert_eq!(
1✔
331
            table
1✔
332
                .columns
1✔
333
                .first()
1✔
334
                .unwrap()
1✔
335
                .rows
1✔
336
                .first()
1✔
337
                .unwrap()
1✔
338
                .get_string()
1✔
339
                .as_deref()
1✔
340
                .unwrap(),
1✔
341
            "DELETED"
342
        );
343
    }
1✔
344

345
    #[test]
346
    fn test_sort_by_name() {
1✔
347
        let mut table = setup_table(None);
1✔
348
        extract_headers(&mut table).unwrap();
1✔
349
        sort_by_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
350
        let mut peekable_rows = table.rows().peekable();
1✔
351
        while let Some(row) = peekable_rows.next() {
514✔
352
            if let Some(next_row) = peekable_rows.peek() {
513✔
353
                assert!(
512✔
354
                    row.get(1).unwrap().get_quantity().unwrap().value
512✔
355
                        >= next_row.get(1).unwrap().get_quantity().unwrap().value
512✔
356
                );
357
            }
1✔
358
        }
359
    }
1✔
360

361
    #[test]
362
    fn test_sort_by_id() {
1✔
363
        let mut table = setup_table(None);
1✔
364
        extract_headers(&mut table).unwrap();
1✔
365
        let column = 1;
1✔
366
        sort_by_column_id(&mut table, column).unwrap();
1✔
367
        let mut peekable_rows = table.rows().peekable();
1✔
368
        while let Some(row) = peekable_rows.next() {
514✔
369
            if let Some(next_row) = peekable_rows.peek() {
513✔
370
                assert!(
512✔
371
                    row.get(column).unwrap().get_quantity().unwrap().value
512✔
372
                        >= next_row.get(column).unwrap().get_quantity().unwrap().value
512✔
373
                );
374
            }
1✔
375
        }
376
    }
1✔
377

378
    #[test]
379
    fn sorting_by_mixed_column_fails() {
1✔
380
        let column = Column {
1✔
381
            header: Some("Field".to_string()),
1✔
382
            rows: vec![
1✔
383
                Value::from_str("1.0", &None),
1✔
384
                Value::String("String-Value".to_string()),
1✔
385
            ],
1✔
386
        };
1✔
387
        let mut table = Table {
1✔
388
            columns: vec![column],
1✔
389
        };
1✔
390
        let order_by_name = sort_by_column_name(&mut table, "Field");
1✔
391
        assert!(matches!(
1✔
392
            order_by_name.unwrap_err(),
1✔
393
            Error::UnexpectedValue(_, _)
394
        ));
395

396
        let order_by_id = sort_by_column_id(&mut table, 0);
1✔
397
        assert!(matches!(
1✔
398
            order_by_id.unwrap_err(),
1✔
399
            Error::UnexpectedValue(_, _)
400
        ));
401
    }
1✔
402

403
    #[test]
404
    fn non_existing_table_fails() {
1✔
405
        let mut table = setup_table(None);
1✔
406
        let order_by_name = sort_by_column_name(&mut table, "Non-Existing-Field");
1✔
407
        assert!(matches!(
1✔
408
            order_by_name.unwrap_err(),
1✔
409
            Error::InvalidAccess(_)
410
        ));
411

412
        let order_by_id = sort_by_column_id(&mut table, 999);
1✔
413
        assert!(matches!(order_by_id.unwrap_err(), Error::InvalidAccess(_)));
1✔
414
    }
1✔
415

416
    #[test]
417
    fn test_delete_cell_by_numb() {
1✔
418
        let mut table = setup_table(None);
1✔
419
        delete_cell_by_number(&mut table, 1, 2).unwrap();
1✔
420

421
        assert_eq!(
1✔
422
            table
1✔
423
                .columns
1✔
424
                .get(1)
1✔
425
                .unwrap()
1✔
426
                .rows
1✔
427
                .get(2)
1✔
428
                .unwrap()
1✔
429
                .get_string()
1✔
430
                .as_deref()
1✔
431
                .unwrap(),
1✔
432
            "DELETED"
433
        );
434

435
        assert_ne!(
1✔
436
            table
1✔
437
                .columns
1✔
438
                .get(1)
1✔
439
                .unwrap()
1✔
440
                .rows
1✔
441
                .first()
1✔
442
                .unwrap()
1✔
443
                .get_string()
1✔
444
                .as_deref()
1✔
445
                .unwrap(),
1✔
446
            "DELETED"
447
        );
448

449
        assert_eq!(
1✔
450
            table
1✔
451
                .columns
1✔
452
                .first()
1✔
453
                .unwrap()
1✔
454
                .rows
1✔
455
                .get(1)
1✔
456
                .unwrap()
1✔
457
                .get_string(),
1✔
458
            None
459
        );
460
    }
1✔
461

462
    #[test]
463
    fn test_delete_cell_by_name() {
1✔
464
        let mut table = setup_table(None);
1✔
465
        extract_headers(&mut table).unwrap();
1✔
466
        delete_cell_by_column_name_and_row_number(&mut table, "Surface [mm²]", 1).unwrap();
1✔
467

468
        assert_eq!(
1✔
469
            table
1✔
470
                .columns
1✔
471
                .get(1)
1✔
472
                .unwrap()
1✔
473
                .rows
1✔
474
                .get(1)
1✔
475
                .unwrap()
1✔
476
                .get_string()
1✔
477
                .as_deref()
1✔
478
                .unwrap(),
1✔
479
            "DELETED"
480
        );
481

482
        assert_eq!(
1✔
483
            table
1✔
484
                .columns
1✔
485
                .get(1)
1✔
486
                .unwrap()
1✔
487
                .rows
1✔
488
                .get(3)
1✔
489
                .unwrap()
1✔
490
                .get_string(),
1✔
491
            None
492
        );
493

494
        assert_eq!(
1✔
495
            table
1✔
496
                .columns
1✔
497
                .get(0)
1✔
498
                .unwrap()
1✔
499
                .rows
1✔
500
                .get(1)
1✔
501
                .unwrap()
1✔
502
                .get_string(),
1✔
503
            None
504
        );
505
    }
1✔
506
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc