• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

VolumeGraphics / havocompare / 8616fda4031ceaa82074873aa6e2de692a884ce1

pending completion
8616fda4031ceaa82074873aa6e2de692a884ce1

push

github

GitHub
Merge pull request #28 from VolumeGraphics/delete-csv-cell

2530 of 2744 relevant lines covered (92.2%)

2264.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.32
/src/csv/preprocessing.rs
1
use crate::csv;
2
use crate::csv::value::Value;
3
use crate::csv::Table;
4
use schemars_derive::JsonSchema;
5
use serde::{Deserialize, Serialize};
6
use std::cmp::Ordering::Equal;
7
use tracing::{debug, warn};
8

9
#[derive(JsonSchema, Deserialize, Serialize, Debug)]
2✔
10
/// Preprocessor options
11
pub enum Preprocessor {
12
    /// Try to extract the headers from the first row - fallible if first row contains a number
13
    ExtractHeaders,
14
    /// Replace all fields in column by number by a deleted marker
15
    DeleteColumnByNumber(usize),
16
    /// Replace all fields in column by name by a deleted marker
17
    DeleteColumnByName(String),
18
    /// Sort rows by column with given name. Fails if no headers were extracted or column name is not found, or if any row has no numbers there
19
    SortByColumnName(String),
20
    /// Sort rows by column with given number. Fails if any row has no numbers there or if out of bounds.
21
    SortByColumnNumber(usize),
22
    /// Replace all fields in row with given number by a deleted marker
23
    DeleteRowByNumber(usize),
24
    /// Replace all fields in row  where at least a single field matches regex by a deleted marker
25
    DeleteRowByRegex(String),
26
    /// replace found cell using row and columnd index by a deleted marker
27
    DeleteCellByNumber {
28
        /// column number
29
        column: usize,
30
        /// row number
31
        row: usize,
32
    },
33
    /// replace found cell using column header and row index by a deleted marker
34
    DeleteCellByName {
35
        /// column with given name
36
        column: String,
37
        /// row number
38
        row: usize,
39
    },
40
}
41

42
impl Preprocessor {
43
    pub(crate) fn process(&self, table: &mut Table) -> Result<(), csv::Error> {
72✔
44
        match self {
72✔
45
            Preprocessor::ExtractHeaders => extract_headers(table),
72✔
46
            Preprocessor::DeleteColumnByNumber(id) => delete_column_number(table, *id),
×
47
            Preprocessor::DeleteColumnByName(name) => delete_column_name(table, name.as_str()),
×
48
            Preprocessor::SortByColumnName(name) => sort_by_column_name(table, name.as_str()),
×
49
            Preprocessor::SortByColumnNumber(id) => sort_by_column_id(table, *id),
×
50
            Preprocessor::DeleteRowByNumber(id) => delete_row_by_number(table, *id),
×
51
            Preprocessor::DeleteRowByRegex(regex) => delete_row_by_regex(table, regex),
×
52
            Preprocessor::DeleteCellByNumber { column, row } => {
×
53
                delete_cell_by_number(table, *column, *row)
×
54
            }
55
            Preprocessor::DeleteCellByName { column, row } => {
×
56
                delete_cell_by_column_name_and_row_number(table, column, *row)
×
57
            }
58
        }
59
    }
72✔
60
}
61

62
fn delete_row_by_regex(table: &mut Table, regex: &str) -> Result<(), csv::Error> {
1✔
63
    let regex = regex::Regex::new(regex)?;
1✔
64
    table
1✔
65
        .rows_mut()
1✔
66
        .filter(|row| row.iter().any(|v| regex.is_match(v.to_string().as_str())))
1,027✔
67
        .for_each(|mut row| row.iter_mut().for_each(|v| **v = Value::deleted()));
2✔
68
    Ok(())
1✔
69
}
1✔
70

71
fn delete_row_by_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
72
    if let Some(mut v) = table.rows_mut().nth(id) {
1✔
73
        v.iter_mut().for_each(|v| **v = Value::deleted())
2✔
74
    }
×
75
    Ok(())
1✔
76
}
1✔
77

78
fn delete_cell_by_number(table: &mut Table, column: usize, row: usize) -> Result<(), csv::Error> {
1✔
79
    let value = table
1✔
80
        .columns
1✔
81
        .get_mut(column)
1✔
82
        .ok_or_else(|| {
1✔
83
            csv::Error::InvalidAccess(format!("Cell with column number {} not found.", column))
×
84
        })?
1✔
85
        .rows
86
        .get_mut(row)
1✔
87
        .ok_or_else(|| {
1✔
88
            csv::Error::InvalidAccess(format!("Cell with row number {} not found.", row))
×
89
        })?;
1✔
90

91
    *value = Value::deleted();
1✔
92

1✔
93
    Ok(())
1✔
94
}
1✔
95

96
fn delete_cell_by_column_name_and_row_number(
1✔
97
    table: &mut Table,
1✔
98
    column: &str,
1✔
99
    row: usize,
1✔
100
) -> Result<(), csv::Error> {
1✔
101
    let value = table
1✔
102
        .columns
1✔
103
        .iter_mut()
1✔
104
        .find(|col| col.header.as_deref().unwrap_or_default() == column)
2✔
105
        .ok_or_else(|| {
1✔
106
            csv::Error::InvalidAccess(format!("Cell with column name '{}' not found.", column))
×
107
        })?
1✔
108
        .rows
109
        .get_mut(row)
1✔
110
        .ok_or_else(|| {
1✔
111
            csv::Error::InvalidAccess(format!("Cell with row number {} not found.", row))
×
112
        })?;
1✔
113

114
    *value = Value::deleted();
1✔
115

1✔
116
    Ok(())
1✔
117
}
1✔
118

119
fn get_permutation(rows_to_sort_by: &Vec<f64>) -> permutation::Permutation {
2✔
120
    permutation::sort_by(rows_to_sort_by, |a, b| b.partial_cmp(a).unwrap_or(Equal))
4,228✔
121
}
2✔
122

123
fn apply_permutation(table: &mut Table, mut permutation: permutation::Permutation) {
2✔
124
    table.columns.iter_mut().for_each(|c| {
4✔
125
        permutation.apply_slice_in_place(&mut c.rows);
4✔
126
    });
4✔
127
}
2✔
128

129
fn sort_by_column_id(table: &mut Table, id: usize) -> Result<(), csv::Error> {
3✔
130
    let sort_master_col = table.columns.get(id).ok_or_else(|| {
3✔
131
        csv::Error::InvalidAccess(format!(
1✔
132
            "Column number sorting by id {id} requested but column not found."
1✔
133
        ))
1✔
134
    })?;
3✔
135
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
136
        .rows
2✔
137
        .iter()
2✔
138
        .map(|v| {
515✔
139
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
140
                csv::Error::UnexpectedValue(
1✔
141
                    v.clone(),
1✔
142
                    "Expected quantity while trying to sort by column id".to_string(),
1✔
143
                )
1✔
144
            })
515✔
145
        })
515✔
146
        .collect();
2✔
147
    let permutation = get_permutation(&col_floats?);
2✔
148
    apply_permutation(table, permutation);
1✔
149
    Ok(())
1✔
150
}
3✔
151

152
fn sort_by_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
3✔
153
    let sort_master_col = table
3✔
154
        .columns
3✔
155
        .iter()
3✔
156
        .find(|c| c.header.as_deref().unwrap_or_default() == name)
5✔
157
        .ok_or_else(|| {
3✔
158
            csv::Error::InvalidAccess(format!(
1✔
159
                "Requested format sorting by column'{name}' but column not found."
1✔
160
            ))
1✔
161
        })?;
3✔
162
    let col_floats: Result<Vec<_>, csv::Error> = sort_master_col
2✔
163
        .rows
2✔
164
        .iter()
2✔
165
        .map(|v| {
515✔
166
            v.get_quantity().map(|q| q.value).ok_or_else(|| {
515✔
167
                csv::Error::UnexpectedValue(
1✔
168
                    v.clone(),
1✔
169
                    "Expected quantity while trying to sort by column name".to_string(),
1✔
170
                )
1✔
171
            })
515✔
172
        })
515✔
173
        .collect();
2✔
174
    let permutation = get_permutation(&col_floats?);
2✔
175
    apply_permutation(table, permutation);
1✔
176
    Ok(())
1✔
177
}
3✔
178

179
fn delete_column_name(table: &mut Table, name: &str) -> Result<(), csv::Error> {
180
    if let Some(c) = table
1✔
181
        .columns
1✔
182
        .iter_mut()
1✔
183
        .find(|col| col.header.as_deref().unwrap_or_default() == name)
2✔
184
    {
1✔
185
        c.delete_contents();
1✔
186
    }
1✔
187
    Ok(())
1✔
188
}
1✔
189

190
fn delete_column_number(table: &mut Table, id: usize) -> Result<(), csv::Error> {
191
    if let Some(col) = table.columns.get_mut(id) {
1✔
192
        col.delete_contents();
1✔
193
    }
1✔
194
    Ok(())
1✔
195
}
1✔
196

197
fn extract_headers(table: &mut Table) -> Result<(), csv::Error> {
78✔
198
    debug!("Extracting headers...");
78✔
199
    let can_extract = table
78✔
200
        .columns
78✔
201
        .iter()
78✔
202
        .all(|c| matches!(c.rows.first(), Some(Value::String(_))));
188✔
203
    if !can_extract {
78✔
204
        warn!("Cannot extract header for this csv!");
×
205
        return Ok(());
×
206
    }
78✔
207

208
    for col in table.columns.iter_mut() {
188✔
209
        let title = col.rows.drain(0..1).next().ok_or_else(|| {
188✔
210
            csv::Error::InvalidAccess("Tried to extract header of empty column!".to_string())
×
211
        })?;
188✔
212
        if let Value::String(title) = title {
188✔
213
            col.header = Some(title);
188✔
214
        }
188✔
215
    }
216
    Ok(())
78✔
217
}
78✔
218

219
#[cfg(test)]
220
mod tests {
221
    use super::*;
222
    use crate::csv::{Column, Delimiters, Error};
223
    use std::fs::File;
224

225
    fn setup_table(delimiters: Option<Delimiters>) -> Table {
10✔
226
        let delimiters = delimiters.unwrap_or_default();
10✔
227
        Table::from_reader(
10✔
228
            File::open("tests/csv/data/DeviationHistogram.csv").unwrap(),
10✔
229
            &delimiters,
10✔
230
        )
10✔
231
        .unwrap()
10✔
232
    }
10✔
233

234
    #[test]
1✔
235
    fn test_extract_headers() {
1✔
236
        let mut table = setup_table(None);
1✔
237
        extract_headers(&mut table).unwrap();
1✔
238
        assert_eq!(
1✔
239
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
240
            "Deviation [mm]"
1✔
241
        );
1✔
242
        assert_eq!(
1✔
243
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
244
            "Surface [mm²]"
1✔
245
        );
1✔
246
    }
1✔
247

248
    #[test]
1✔
249
    fn test_delete_column_by_id() {
1✔
250
        let mut table = setup_table(None);
1✔
251
        extract_headers(&mut table).unwrap();
1✔
252
        delete_column_number(&mut table, 0).unwrap();
1✔
253
        assert_eq!(
1✔
254
            table.columns.first().unwrap().header.as_deref().unwrap(),
1✔
255
            "DELETED"
1✔
256
        );
1✔
257
        assert!(table
1✔
258
            .columns
1✔
259
            .first()
1✔
260
            .unwrap()
1✔
261
            .rows
1✔
262
            .iter()
1✔
263
            .all(|v| *v == csv::Value::deleted()));
513✔
264
    }
1✔
265

266
    #[test]
1✔
267
    fn test_delete_column_by_name() {
1✔
268
        let mut table = setup_table(None);
1✔
269
        extract_headers(&mut table).unwrap();
1✔
270
        delete_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
271
        assert_eq!(
1✔
272
            table.columns.last().unwrap().header.as_deref().unwrap(),
1✔
273
            "DELETED"
1✔
274
        );
1✔
275
        assert!(table
1✔
276
            .columns
1✔
277
            .last()
1✔
278
            .unwrap()
1✔
279
            .rows
1✔
280
            .iter()
1✔
281
            .all(|v| *v == csv::Value::deleted()));
513✔
282
    }
1✔
283

284
    #[test]
1✔
285
    fn test_delete_row_by_id() {
1✔
286
        let mut table = setup_table(None);
1✔
287
        delete_row_by_number(&mut table, 0).unwrap();
1✔
288
        assert_eq!(
1✔
289
            table
1✔
290
                .columns
1✔
291
                .first()
1✔
292
                .unwrap()
1✔
293
                .rows
1✔
294
                .first()
1✔
295
                .unwrap()
1✔
296
                .get_string()
1✔
297
                .as_deref()
1✔
298
                .unwrap(),
1✔
299
            "DELETED"
1✔
300
        );
1✔
301
    }
1✔
302

303
    #[test]
1✔
304
    fn test_delete_row_by_regex() {
1✔
305
        let mut table = setup_table(None);
1✔
306
        delete_row_by_regex(&mut table, "mm").unwrap();
1✔
307
        assert_eq!(
1✔
308
            table
1✔
309
                .columns
1✔
310
                .first()
1✔
311
                .unwrap()
1✔
312
                .rows
1✔
313
                .first()
1✔
314
                .unwrap()
1✔
315
                .get_string()
1✔
316
                .as_deref()
1✔
317
                .unwrap(),
1✔
318
            "DELETED"
1✔
319
        );
1✔
320
    }
1✔
321

322
    #[test]
1✔
323
    fn test_sort_by_name() {
1✔
324
        let mut table = setup_table(None);
1✔
325
        extract_headers(&mut table).unwrap();
1✔
326
        sort_by_column_name(&mut table, "Surface [mm²]").unwrap();
1✔
327
        let mut peekable_rows = table.rows().peekable();
1✔
328
        while let Some(row) = peekable_rows.next() {
514✔
329
            if let Some(next_row) = peekable_rows.peek() {
513✔
330
                assert!(
512✔
331
                    row.get(1).unwrap().get_quantity().unwrap().value
512✔
332
                        >= next_row.get(1).unwrap().get_quantity().unwrap().value
512✔
333
                );
512✔
334
            }
1✔
335
        }
336
    }
1✔
337

338
    #[test]
1✔
339
    fn test_sort_by_id() {
1✔
340
        let mut table = setup_table(None);
1✔
341
        extract_headers(&mut table).unwrap();
1✔
342
        let column = 1;
1✔
343
        sort_by_column_id(&mut table, column).unwrap();
1✔
344
        let mut peekable_rows = table.rows().peekable();
1✔
345
        while let Some(row) = peekable_rows.next() {
514✔
346
            if let Some(next_row) = peekable_rows.peek() {
513✔
347
                assert!(
512✔
348
                    row.get(column).unwrap().get_quantity().unwrap().value
512✔
349
                        >= next_row.get(column).unwrap().get_quantity().unwrap().value
512✔
350
                );
512✔
351
            }
1✔
352
        }
353
    }
1✔
354

355
    #[test]
1✔
356
    fn sorting_by_mixed_column_fails() {
1✔
357
        let column = Column {
1✔
358
            header: Some("Field".to_string()),
1✔
359
            rows: vec![
1✔
360
                Value::from_str("1.0", &None),
1✔
361
                Value::String("String-Value".to_string()),
1✔
362
            ],
1✔
363
        };
1✔
364
        let mut table = Table {
1✔
365
            columns: vec![column],
1✔
366
        };
1✔
367
        let order_by_name = sort_by_column_name(&mut table, "Field");
1✔
368
        assert!(matches!(
1✔
369
            order_by_name.unwrap_err(),
1✔
370
            Error::UnexpectedValue(_, _)
371
        ));
372

373
        let order_by_id = sort_by_column_id(&mut table, 0);
1✔
374
        assert!(matches!(
1✔
375
            order_by_id.unwrap_err(),
1✔
376
            Error::UnexpectedValue(_, _)
377
        ));
378
    }
1✔
379

380
    #[test]
1✔
381
    fn non_existing_table_fails() {
1✔
382
        let mut table = setup_table(None);
1✔
383
        let order_by_name = sort_by_column_name(&mut table, "Non-Existing-Field");
1✔
384
        assert!(matches!(
1✔
385
            order_by_name.unwrap_err(),
1✔
386
            Error::InvalidAccess(_)
387
        ));
388

389
        let order_by_id = sort_by_column_id(&mut table, 999);
1✔
390
        assert!(matches!(order_by_id.unwrap_err(), Error::InvalidAccess(_)));
1✔
391
    }
1✔
392

393
    #[test]
1✔
394
    fn test_delete_cell_by_numb() {
1✔
395
        let mut table = setup_table(None);
1✔
396
        delete_cell_by_number(&mut table, 1, 2).unwrap();
1✔
397

1✔
398
        assert_eq!(
1✔
399
            table
1✔
400
                .columns
1✔
401
                .get(1)
1✔
402
                .unwrap()
1✔
403
                .rows
1✔
404
                .get(2)
1✔
405
                .unwrap()
1✔
406
                .get_string()
1✔
407
                .as_deref()
1✔
408
                .unwrap(),
1✔
409
            "DELETED"
1✔
410
        );
1✔
411

412
        assert_ne!(
1✔
413
            table
1✔
414
                .columns
1✔
415
                .get(1)
1✔
416
                .unwrap()
1✔
417
                .rows
1✔
418
                .first()
1✔
419
                .unwrap()
1✔
420
                .get_string()
1✔
421
                .as_deref()
1✔
422
                .unwrap(),
1✔
423
            "DELETED"
1✔
424
        );
1✔
425

426
        assert_eq!(
1✔
427
            table
1✔
428
                .columns
1✔
429
                .first()
1✔
430
                .unwrap()
1✔
431
                .rows
1✔
432
                .get(1)
1✔
433
                .unwrap()
1✔
434
                .get_string(),
1✔
435
            None
1✔
436
        );
1✔
437
    }
1✔
438

439
    #[test]
1✔
440
    fn test_delete_cell_by_name() {
1✔
441
        let mut table = setup_table(None);
1✔
442
        extract_headers(&mut table).unwrap();
1✔
443
        delete_cell_by_column_name_and_row_number(&mut table, "Surface [mm²]", 1).unwrap();
1✔
444

1✔
445
        assert_eq!(
1✔
446
            table
1✔
447
                .columns
1✔
448
                .get(1)
1✔
449
                .unwrap()
1✔
450
                .rows
1✔
451
                .get(1)
1✔
452
                .unwrap()
1✔
453
                .get_string()
1✔
454
                .as_deref()
1✔
455
                .unwrap(),
1✔
456
            "DELETED"
1✔
457
        );
1✔
458

459
        assert_eq!(
1✔
460
            table
1✔
461
                .columns
1✔
462
                .get(1)
1✔
463
                .unwrap()
1✔
464
                .rows
1✔
465
                .get(3)
1✔
466
                .unwrap()
1✔
467
                .get_string(),
1✔
468
            None
1✔
469
        );
1✔
470

471
        assert_eq!(
1✔
472
            table
1✔
473
                .columns
1✔
474
                .get(0)
1✔
475
                .unwrap()
1✔
476
                .rows
1✔
477
                .get(1)
1✔
478
                .unwrap()
1✔
479
                .get_string(),
1✔
480
            None
1✔
481
        );
1✔
482
    }
1✔
483
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc