• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

clintval / vartovcf / 9800945742

04 Jul 2024 11:45PM UTC coverage: 90.476% (-4.6%) from 95.111%
9800945742

push

github

web-flow
chore: update dependencies and linting (#83)

* chore: update dependencies and linting

* chore: remove dead code

5 of 6 new or added lines in 2 files covered. (83.33%)

4 existing lines in 2 files now uncovered.

228 of 252 relevant lines covered (90.48%)

1.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.41
/src/lib/mod.rs
1
//! A library for working with VarDict/VarDictJava output.
2
#![warn(missing_docs)]
3

4
use anyhow::Result;
5
use csv::ReaderBuilder;
6
use log::*;
7
use rust_htslib::bcf::Format;
8
use rust_htslib::bcf::Writer as VcfWriter;
9
use std::collections::HashSet;
10
use std::error;
11
use std::fmt::Debug;
12
use std::io::Read;
13
use std::path::{Path, PathBuf};
14
use strum::Display;
15
use strum::{EnumString, VariantNames};
16

17
use crate::fai::{fasta_contigs_to_vcf_header, fasta_path_to_vcf_header};
18
use crate::io::has_gzip_ext;
19
use crate::progress_logger::{ProgressLogger, RecordLogger, DEFAULT_LOG_EVERY};
20
use crate::record::tumor_only_header;
21
use crate::record::TumorOnlyVariant;
22

23
pub mod fai;
24
pub mod io;
25
pub mod progress_logger;
26
pub mod record;
27

28
/// The default log level for the `vartovcf` tool.
29
pub const DEFAULT_LOG_LEVEL: &str = "info";
30

31
/// The valid structural variation (SV) type values for the `SVTYPE` FORMAT field.
32
pub const VALID_SV_TYPES: &[&str] = &["BND", "CNV", "DEL", "DUP", "INS", "INV"];
33

34
/// Namespace for path parts and extensions.
35
pub mod path {
36

37
    /// The Gzip extension.
38
    pub const GZIP_EXTENSION: &str = "gz";
39
}
40

41
// TODO: This could become unnecessary if we can dynamically determine the mode based on input.
42
/// The variant calling modes for VarDict/VarDictJava.
43
#[derive(Clone, Copy, Debug, Display, EnumString, VariantNames, PartialEq, PartialOrd)]
44
pub enum VarDictMode {
45
    /// The amplicon variant calling mode.
46
    //Amplicon,
47
    /// The tumor-normal variant calling mode.
48
    //TumorNormal,
49
    /// The tumor-only variant calling mode.
50
    TumorOnly,
51
}
52

53
/// Runs the tool `vartovcf` on an input VAR file and writes the records to an output VCF file.
54
///
55
/// # Arguments
56
///
57
/// * `input` - The input VAR file or stream
58
/// * `output` - The output VCF file or stream
59
/// * `fasta` - The reference sequence FASTA file, must be indexed
60
/// * `sample` - The sample name
61
/// * `mode` - The variant calling modes for VarDict/VarDictJava
62
///
63
/// # Returns
64
///
65
/// Returns the result of the execution with an integer exit code for success (0).
66
///
67
pub fn vartovcf<I, R>(
1✔
68
    input: I,
69
    output: Option<PathBuf>,
70
    fasta: R,
71
    sample: &str,
72
    mode: &VarDictMode,
73
) -> Result<i32, Box<dyn error::Error>>
74
where
75
    I: Read,
76
    R: AsRef<Path> + Debug,
77
{
78
    // TODO: Support paired tumor-normal input from VarDictJava.
79
    assert_eq!(
2✔
80
        mode,
×
81
        &VarDictMode::TumorOnly,
×
UNCOV
82
        "The only mode currently supported is [TumorOnly]."
×
83
    );
84

85
    // TODO: Implement a way to peek forward so we can parameterize the header based on the input.
86
    let mut header = tumor_only_header(sample);
1✔
87

88
    fasta_contigs_to_vcf_header(&fasta, &mut header);
1✔
89
    fasta_path_to_vcf_header(&fasta, &mut header).expect("Adding FASTA path to header failed!");
1✔
90

91
    let mut reader = ReaderBuilder::new()
3✔
92
        .delimiter(b'\t')
93
        .has_headers(false)
94
        .from_reader(input);
2✔
95

96
    let mut writer = match output {
2✔
97
        Some(output) => VcfWriter::from_path(&output, &header, !has_gzip_ext(&output), Format::Vcf),
1✔
NEW
98
        None => VcfWriter::from_stdout(&header, true, Format::Vcf),
×
99
    }
100
    .expect("Could not build a VCF writer!");
101

102
    let mut progress = ProgressLogger::new("processed", "variant records", DEFAULT_LOG_EVERY);
1✔
103
    let mut carry = csv::StringRecord::new();
1✔
104
    let mut variant = writer.empty_record();
2✔
105
    let mut seen: HashSet<String> = HashSet::new();
2✔
106

107
    while reader.read_record(&mut carry)? {
2✔
108
        if carry.iter().collect::<Vec<&str>>()[5].is_empty() {
2✔
109
            continue; // If the 5th field is empty, it's a record we need to avoid deserializing.
110
        }
111

112
        let var: TumorOnlyVariant = carry
1✔
113
            .deserialize(None)
1✔
114
            .expect("Could not deserialize record!");
115

116
        let key = format!(
5✔
117
            "{}-{}-{}-{}",
118
            var.contig, var.start, var.ref_allele, var.alt_allele
×
119
        );
120
        if !seen.insert(key) {
1✔
121
            continue; // Skip this record if we have seen this variant before.
122
        }
123

124
        if var.sample != sample {
1✔
125
            let message = format!("Expected sample '{}' found '{}'!", sample, var.sample);
2✔
126
            progress.emit()?;
2✔
127
            return Err(message.into());
2✔
128
        };
129

130
        let rid = writer.header().name2rid(var.contig.as_bytes()).unwrap();
3✔
131

132
        variant.set_rid(Some(rid));
1✔
133
        variant.set_pos(var.start as i64 - 1);
1✔
134
        variant.set_alleles(&[
1✔
135
            var.ref_allele.as_bytes(),
1✔
136
            var.alt_allele_for_vcf().as_bytes(),
1✔
137
        ])?;
138

139
        if var.alt_depth == 0 {
1✔
140
            variant.set_qual(0.0)
×
141
        } else {
142
            let qual = (var.alt_depth as f32).ln() / 2.0_f32.ln() * var.base_quality_mean;
2✔
143
            variant.set_qual(qual)
1✔
144
        }
145

146
        variant.push_info_float(b"ADJAF", &[var.af_adjusted])?;
2✔
147
        variant.push_info_float(b"BASEQUALMEAN", &[var.base_quality_mean])?;
2✔
148
        variant.push_info_float(b"BASEQUALSTDEV", &[var.stdev_base_stdev])?;
2✔
149
        variant.push_info_string(b"BIAS", &[var.strand_bias.to_string().as_bytes()])?;
2✔
150
        variant.push_info_integer(b"BIASALT", &[var.alt_forward, var.alt_reverse])?;
1✔
151
        variant.push_info_integer(b"BIASREF", &[var.ref_forward, var.ref_reverse])?;
2✔
152

153
        if let Some(duplication_rate) = var.duplication_rate {
2✔
154
            variant.push_info_float(b"DUPRATE", &[duplication_rate])?;
2✔
155
        } else {
156
            // NB: Without clearing the fields, you'll end up with stale references.
157
            variant.clear_info_float(b"DUPRATE")?;
2✔
158
        }
159

160
        variant.push_info_integer(b"END", &[var.end as i32])?;
2✔
161
        variant.push_info_float(b"HIAF", &[var.af_high_quality_bases])?;
2✔
162
        variant.push_info_integer(b"HICNT", &[var.high_quality_variant_reads])?;
2✔
163
        variant.push_info_integer(b"HICOV", &[var.high_quality_total_reads])?;
2✔
164
        variant.push_info_float(b"MEANMAPQ", &[var.mean_mapping_quality])?;
2✔
165
        variant.push_info_integer(b"MSI", &[var.microsatellite])?;
2✔
166
        variant.push_info_integer(b"MSILEN", &[var.microsatellite_length])?;
2✔
167
        variant.push_info_float(b"NM", &[var.mean_mismatches_in_reads])?;
2✔
168
        variant.push_info_float(b"POSMEAN", &[var.mean_position_in_read])?;
2✔
169
        variant.push_info_float(b"POSSTDEV", &[var.stdev_position_in_read])?;
2✔
170
        variant.push_info_integer(b"SHIFT3", &[var.num_bases_3_prime_shift_for_deletions])?;
2✔
171
        variant.push_info_integer(b"SN", &[var.signal_to_noise])?;
2✔
172

173
        if let Some(sv_info) = &var.sv_info {
2✔
174
            variant.push_info_integer(b"SPLITREAD", &[sv_info.supporting_split_reads])?;
2✔
175
            variant.push_info_integer(b"SPANPAIR", &[sv_info.supporting_pairs])?;
2✔
176
        } else {
177
            // NB: Without clearing the fields, you'll end up with stale references.
178
            variant.clear_info_integer(b"SPLITREAD")?;
2✔
179
            variant.clear_info_integer(b"SPANPAIR")?;
2✔
180
        }
181

182
        variant.push_info_float(b"STRANDBIASODDRATIO", &[var.strand_bias_odds_ratio])?;
2✔
183
        variant.push_info_float(b"STRANDBIASPVALUE", &[var.strand_bias_p_value])?;
2✔
184

185
        if VALID_SV_TYPES.contains(&var.variant_type) {
2✔
186
            variant.push_info_integer(b"SVLEN", &[var.length()])?;
2✔
187
            variant.push_info_string(b"SVTYPE", &[var.variant_type.as_bytes()])?;
2✔
188
        } else {
189
            // NB: Without clearing the fields, you'll end up with stale references.
190
            variant.clear_info_integer(b"SVLEN")?;
2✔
191
            variant.clear_info_integer(b"SVTYPE")?;
2✔
192
        }
193

194
        variant.push_genotypes(var.gt_value(0.25))?;
2✔
195
        variant.push_format_integer(b"AD", &var.ad_value())?;
2✔
196
        variant.push_format_integer(b"DP", &[var.depth])?;
1✔
197
        variant.push_format_integer(b"VD", &[var.alt_depth])?;
2✔
198

199
        writer.write(&variant)?;
2✔
200
        progress.observe()?;
2✔
201
    }
202

203
    progress.emit()?;
3✔
204

205
    Ok(0)
1✔
206
}
207

208
#[cfg(test)]
209
mod tests {
210
    use std::fs::File;
211
    use std::io::BufReader;
212
    use std::path::PathBuf;
213

214
    use anyhow::Result;
215
    use file_diff::diff;
216
    use pretty_assertions::assert_eq;
217
    use tempfile::NamedTempFile;
218

219
    use super::VarDictMode::TumorOnly;
220
    use super::*;
221

222
    #[test]
223
    fn test_vartovcf_run() -> Result<(), Box<dyn std::error::Error>> {
224
        let sample = "dna00001";
225
        let input = BufReader::new(File::open("tests/calls.var")?);
226
        let output = NamedTempFile::new().expect("Cannot create temporary file!");
227
        let reference = PathBuf::from("tests/reference.fa");
228
        let exit = vartovcf(
229
            input,
230
            Some(output.path().into()),
231
            &reference,
232
            &sample,
233
            &TumorOnly,
234
        )?;
235
        assert_eq!(exit, 0);
236
        assert!(diff(&output.path().to_str().unwrap(), "tests/calls.vcf"));
237
        Ok(())
238
    }
239

240
    #[test]
241
    fn test_when_incorrect_sample() {
242
        let sample = "XXXXXXXX";
243
        let input = BufReader::new(File::open("tests/calls.var").unwrap());
244
        let output = NamedTempFile::new().expect("Cannot create temporary file!");
245
        let reference = PathBuf::from("tests/reference.fa");
246
        let result = vartovcf(
247
            input,
248
            Some(output.path().into()),
249
            &reference,
250
            &sample,
251
            &TumorOnly,
252
        );
253
        assert!(result.is_err());
254
    }
255
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc