• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rust-bio / rust-htslib / 14738329579

29 Apr 2025 06:12PM UTC coverage: 83.282% (-0.3%) from 83.605%
14738329579

Pull #473

github

web-flow
Merge 61ffae407 into 8741513e4
Pull Request #473: feat: Adding `from_hashmap` for bam Header

0 of 9 new or added lines in 1 file covered. (0.0%)

3 existing lines in 2 files now uncovered.

2720 of 3266 relevant lines covered (83.28%)

17801.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.91
/src/bam/header.rs
1
// Copyright 2014 Johannes Köster.
2
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3
// This file may not be copied, modified, or distributed
4
// except according to those terms.
5

6
use crate::bam::HeaderView;
7
use lazy_static::lazy_static;
8
use linear_map::LinearMap;
9
use regex::Regex;
10
use std::borrow::Cow;
11
use std::collections::HashMap;
12

13
/// A BAM header.
14
#[derive(Debug, Clone)]
15
pub struct Header {
16
    records: Vec<Vec<u8>>,
17
}
18

19
impl Default for Header {
20
    fn default() -> Self {
×
21
        Self::new()
×
22
    }
23
}
24

25
impl Header {
26
    /// Create a new header.
27
    pub fn new() -> Self {
15✔
28
        Header {
29
            records: Vec::new(),
15✔
30
        }
31
    }
32

33
    pub fn from_template(header: &HeaderView) -> Self {
12✔
34
        let mut record = header.as_bytes().to_owned();
12✔
35
        // Strip off any trailing newline character.
36
        // Otherwise there could be a blank line in the
37
        // header which samtools (<=1.6) will complain
38
        // about
39
        while let Some(&last_char) = record.last() {
42✔
40
            if last_char == b'\n' {
12✔
41
                record.pop();
12✔
42
            } else {
43
                break;
9✔
44
            }
45
        }
46
        Header {
47
            records: vec![record],
15✔
48
        }
49
    }
50

51
    /// Creates a new Header from a HashMap. Useful if you have used `to_hashmap` and manipulated that hashmap to create a new header.
NEW
52
    pub fn from_hashmap(hashmap: HashMap<String, Vec<LinearMap<String, String>>>) -> Self {
×
NEW
53
        let mut header = Header::new();
×
NEW
54
        for (key, values) in hashmap.iter() {
×
NEW
55
            for value in values {
×
NEW
56
                let mut record = HeaderRecord::new(key.as_bytes());
×
NEW
57
                for (tag, val) in value.iter() {
×
NEW
58
                    record.push_tag(tag.as_bytes(), val);
×
59
                }
NEW
60
                header.push_record(&record);
×
61
            }
62
        }
NEW
63
        header
×
64
    }
65

66
    /// Add a record to the header.
67
    pub fn push_record(&mut self, record: &HeaderRecord<'_>) -> &mut Self {
11✔
68
        self.records.push(record.to_bytes());
11✔
69
        self
10✔
70
    }
71

72
    /// Add a comment to the header.
73
    pub fn push_comment(&mut self, comment: &[u8]) -> &mut Self {
×
74
        self.records.push([&b"@CO"[..], comment].join(&b'\t'));
×
75
        self
76
    }
77

78
    pub fn to_bytes(&self) -> Vec<u8> {
26✔
79
        self.records.join(&b'\n')
26✔
80
    }
81

82
    /// This returns a header as a HashMap.
83
    /// Comment lines starting with "@CO" will NOT be included in the HashMap.
84
    /// Comment lines can be obtained by the `comments` function.
85
    pub fn to_hashmap(&self) -> HashMap<String, Vec<LinearMap<String, String>>> {
4✔
86
        let mut header_map = HashMap::default();
4✔
87

88
        lazy_static! {
4✔
89
            static ref REC_TYPE_RE: Regex = Regex::new(r"@([A-Z][A-Z])").unwrap();
2✔
90
            static ref TAG_RE: Regex = Regex::new(r"([A-Za-z][A-Za-z0-9]):([ -~]*)").unwrap();
2✔
91
        }
92

93
        let header_string = String::from_utf8(self.to_bytes()).unwrap();
6✔
94

95
        for line in header_string.split('\n').filter(|x| !x.is_empty()) {
26✔
96
            let parts: Vec<_> = line.split('\t').filter(|x| !x.is_empty()).collect();
33✔
97
            // assert!(rec_type_re.is_match(parts[0]));
98
            let record_type = REC_TYPE_RE
6✔
99
                .captures(parts[0])
2✔
100
                .unwrap()
101
                .get(1)
102
                .unwrap()
103
                .as_str()
104
                .to_owned();
105
            if record_type.eq("CO") {
2✔
106
                continue;
107
            }
108
            let mut field = LinearMap::default();
10✔
109
            for part in parts.iter().skip(1) {
29✔
110
                let cap = TAG_RE.captures(part).unwrap();
4✔
111
                let tag = cap.get(1).unwrap().as_str().to_owned();
4✔
112
                let value = cap.get(2).unwrap().as_str().to_owned();
4✔
113
                field.insert(tag, value);
2✔
114
            }
115
            header_map
12✔
116
                .entry(record_type)
10✔
117
                .or_insert_with(Vec::new)
8✔
118
                .push(field);
10✔
119
        }
120
        header_map
4✔
121
    }
122

123
    /// Returns an iterator of comment lines.
124
    pub fn comments(&self) -> impl Iterator<Item = Cow<str>> {
×
125
        self.records.iter().flat_map(|r| {
×
126
            r.split(|x| x == &b'\n')
×
127
                .filter(|x| x.starts_with(b"@CO\t"))
×
128
                .map(|x| String::from_utf8_lossy(&x[4..]))
×
129
        })
130
    }
131
}
132

133
/// Header record.
134
#[derive(Debug, Clone)]
135
pub struct HeaderRecord<'a> {
136
    rec_type: Vec<u8>,
137
    tags: Vec<(&'a [u8], Vec<u8>)>,
138
}
139

140
impl<'a> HeaderRecord<'a> {
141
    /// Create a new header record.
142
    /// See SAM format specification for possible record types.
143
    pub fn new(rec_type: &'a [u8]) -> Self {
12✔
144
        HeaderRecord {
145
            rec_type: [&b"@"[..], rec_type].concat(),
12✔
146
            tags: Vec::new(),
12✔
147
        }
148
    }
149

150
    /// Add a new tag to the record.
151
    ///
152
    /// # Arguments
153
    ///
154
    /// * `tag` - the tag identifier
155
    /// * `value` - the value. Can be any type convertible into a string. Preferably numbers or
156
    ///   strings.
157
    pub fn push_tag<V: ToString>(&mut self, tag: &'a [u8], value: V) -> &mut Self {
35✔
158
        self.tags.push((tag, value.to_string().into_bytes()));
39✔
159
        self
31✔
160
    }
161

162
    fn to_bytes(&self) -> Vec<u8> {
12✔
163
        let mut out = Vec::new();
12✔
164
        out.extend(self.rec_type.iter());
13✔
165
        for &(tag, ref value) in self.tags.iter() {
43✔
166
            out.push(b'\t');
1✔
167
            out.extend(tag.iter());
1✔
168
            out.push(b':');
1✔
169
            out.extend(value.iter());
1✔
170
        }
171
        out
12✔
172
    }
173
}
174

175
#[cfg(test)]
176
mod tests {
177
    use super::HeaderRecord;
178

179
    #[test]
180
    fn test_push_tag() {
181
        let mut record = HeaderRecord::new(b"HD");
182
        record.push_tag(b"X1", 0);
183
        record.push_tag(b"X2", 0);
184

185
        let x = "x".to_string();
186
        record.push_tag(b"X3", x.as_str());
187
        record.push_tag(b"X4", &x);
188
        record.push_tag(b"X5", x);
189

190
        assert_eq!(record.to_bytes(), b"@HD\tX1:0\tX2:0\tX3:x\tX4:x\tX5:x");
191
    }
192
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc