• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

actioninja / refpack-rs / 6602624440

22 Oct 2023 08:13AM UTC coverage: 91.945% (-0.09%) from 92.031%
6602624440

push

github

actioninja
chore: remove dupe rustfmt

1073 of 1167 relevant lines covered (91.95%)

155815.83 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.14
/src/data/compression.rs
1
////////////////////////////////////////////////////////////////////////////////
2
// This Source Code Form is subject to the terms of the Mozilla Public         /
3
// License, v. 2.0. If a copy of the MPL was not distributed with this         /
4
// file, You can obtain one at https://mozilla.org/MPL/2.0/.                   /
5
//                                                                             /
6
////////////////////////////////////////////////////////////////////////////////
7

8
//! Compression algorithms, helpers, and compression encoding
9
use std::cmp::max;
10
use std::collections::HashMap;
11
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
12

13
use crate::data::control::{Command, Control, Mode};
14
use crate::format::Format;
15
use crate::header::mode::Mode as HeaderMode;
16
use crate::header::Header;
17
use crate::{RefPackError, RefPackResult};
18

19
// Optimization trick from libflate_lz77
20
// Faster lookups for very large tables
21
#[derive(Debug)]
×
22
enum PrefixTable {
23
    Small(HashMap<[u8; 3], u32>),
24
    Large(LargePrefixTable),
25
}
26

27
impl PrefixTable {
28
    fn new<M: Mode>(bytes: usize) -> Self {
101,298✔
29
        if bytes < M::SIZES.long_offset_max() as usize {
101,298✔
30
            PrefixTable::Small(HashMap::new())
101,298✔
31
        } else {
32
            PrefixTable::Large(LargePrefixTable::new())
×
33
        }
34
    }
101,298✔
35

36
    fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
5,730,265✔
37
        match *self {
5,730,265✔
38
            PrefixTable::Small(ref mut table) => table.insert(prefix, position),
5,730,265✔
39
            PrefixTable::Large(ref mut table) => table.insert(prefix, position),
×
40
        }
41
    }
5,730,265✔
42
}
43

44
#[derive(Debug)]
×
45
struct LargePrefixTable {
46
    table: Vec<Vec<(u8, u32)>>,
47
}
48

49
impl LargePrefixTable {
50
    fn new() -> Self {
×
51
        LargePrefixTable {
×
52
            table: (0..=0xFFFF).map(|_| Vec::new()).collect(),
×
53
        }
×
54
    }
×
55

56
    fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
×
57
        let p0 = prefix[0] as usize;
×
58
        let p1 = prefix[1] as usize;
×
59
        let p2 = prefix[2];
×
60

×
61
        let index = (p0 << 8) | p1;
×
62
        let positions = &mut self.table[index];
×
63
        for &mut (key, ref mut value) in positions.iter_mut() {
×
64
            if key == p2 {
×
65
                let old = *value;
×
66
                *value = position;
×
67
                return Some(old);
×
68
            }
×
69
        }
70
        positions.push((p2, position));
×
71
        None
×
72
    }
×
73
}
74

75
fn prefix(input_buf: &[u8]) -> [u8; 3] {
5,730,265✔
76
    let buf: &[u8] = &input_buf[..3];
5,730,265✔
77
    [buf[0], buf[1], buf[2]]
5,730,265✔
78
}
5,730,265✔
79

80
/// Reads from an incoming `Read` reader and compresses and encodes to
81
/// `Vec<Control>`
82
pub(crate) fn encode_stream<F: Format>(
101,298✔
83
    reader: &mut (impl Read + Seek),
101,298✔
84
    length: usize,
101,298✔
85
) -> Result<Vec<Control>, RefPackError> {
101,298✔
86
    let sizes = F::ControlMode::SIZES;
101,298✔
87

101,298✔
88
    let mut in_buffer = vec![0_u8; length];
101,298✔
89
    reader.read_exact(&mut in_buffer)?;
101,298✔
90
    let mut controls: Vec<Control> = vec![];
101,298✔
91
    let mut prefix_table = PrefixTable::new::<F::ControlMode>(in_buffer.len());
101,298✔
92

101,298✔
93
    let mut i = 0;
101,298✔
94
    let end = max(3, in_buffer.len()) - 3;
101,298✔
95
    let mut literal_block: Vec<u8> = Vec::with_capacity(sizes.literal_max() as usize);
101,298✔
96
    while i < end {
5,831,477✔
97
        let key = prefix(&in_buffer[i..]);
5,730,179✔
98

5,730,179✔
99
        // get the position of the prefix in the table (if it exists)
5,730,179✔
100
        let matched = prefix_table.insert(key, i as u32);
5,730,179✔
101

5,730,179✔
102
        let pair = matched.map(|x| x as usize).and_then(|matched| {
5,730,179✔
103
            let distance = i - matched;
51✔
104
            if distance > sizes.long_offset_max() as usize
51✔
105
                || distance < sizes.short_offset_min() as usize
51✔
106
            {
107
                None
×
108
            } else {
109
                // find the longest common prefix
110
                let max_copy_len = sizes.long_length_max() as usize;
51✔
111
                let match_length = in_buffer[i..]
51✔
112
                    .iter()
51✔
113
                    .take(max_copy_len - 3)
51✔
114
                    .zip(&in_buffer[matched..])
51✔
115
                    .take_while(|(a, b)| a == b)
204✔
116
                    .count();
51✔
117

51✔
118
                // Insufficient similarity for given distance, reject
51✔
119
                if (match_length <= sizes.medium_length_min() as usize
51✔
120
                    && distance > sizes.short_offset_max() as usize)
51✔
121
                    || (match_length <= sizes.long_length_min() as usize
43✔
122
                        && distance > sizes.medium_offset_max() as usize)
43✔
123
                {
124
                    None
8✔
125
                } else {
126
                    Some((matched, match_length))
43✔
127
                }
128
            }
129
        });
5,730,179✔
130

131
        if let Some((found, match_length)) = pair {
5,730,179✔
132
            let distance = i - found;
43✔
133

43✔
134
            // If the current literal block is longer than the copy limit we need to split
43✔
135
            // the block
43✔
136
            if literal_block.len() > sizes.copy_literal_max() as usize {
43✔
137
                let split_point: usize = literal_block.len() - (literal_block.len() % 4);
41✔
138
                controls.push(Control::new_literal_block::<F::ControlMode>(
41✔
139
                    &literal_block[..split_point],
41✔
140
                ));
41✔
141
                let second_block = &literal_block[split_point..];
41✔
142
                controls.push(Control::new(
41✔
143
                    Command::new::<F::ControlMode>(distance, match_length, second_block.len()),
41✔
144
                    second_block.to_vec(),
41✔
145
                ));
41✔
146
            } else {
41✔
147
                // If it's not, just push a new block directly
2✔
148
                controls.push(Control::new(
2✔
149
                    Command::new::<F::ControlMode>(distance, match_length, literal_block.len()),
2✔
150
                    literal_block.clone(),
2✔
151
                ));
2✔
152
            }
2✔
153
            literal_block.clear();
43✔
154

155
            for k in (i..).take(match_length).skip(1) {
86✔
156
                if k >= end {
86✔
157
                    break;
×
158
                }
86✔
159
                prefix_table.insert(prefix(&in_buffer[k..]), k as u32);
86✔
160
            }
161

162
            i += match_length;
43✔
163
        } else {
164
            literal_block.push(in_buffer[i]);
5,730,136✔
165
            i += 1;
5,730,136✔
166
            // If it's reached the limit, push the block immediately and clear the running
5,730,136✔
167
            // block
5,730,136✔
168
            if literal_block.len() >= (sizes.literal_max() as usize) {
5,730,136✔
169
                controls.push(Control::new_literal_block::<F::ControlMode>(&literal_block));
8,430✔
170
                literal_block.clear();
8,430✔
171
            }
5,721,706✔
172
        }
173
    }
174
    // Add remaining literals if there are any
175
    if i < in_buffer.len() {
101,298✔
176
        literal_block.extend_from_slice(&in_buffer[i..]);
101,298✔
177
    }
101,298✔
178
    // Extremely similar to block up above, but with a different control type
179
    if literal_block.len() > 3 {
101,298✔
180
        let split_point: usize = literal_block.len() - (literal_block.len() % 4);
98,308✔
181
        controls.push(Control::new_literal_block::<F::ControlMode>(
98,308✔
182
            &literal_block[..split_point],
98,308✔
183
        ));
98,308✔
184
        controls.push(Control::new_stop::<F::ControlMode>(
98,308✔
185
            &literal_block[split_point..],
98,308✔
186
        ));
98,308✔
187
    } else {
98,308✔
188
        controls.push(Control::new_stop::<F::ControlMode>(&literal_block));
2,990✔
189
    }
2,990✔
190

191
    Ok(controls)
101,298✔
192
}
101,298✔
193

194
/// Compress a data stream from a Reader to refpack format into a Writer.
195
///
196
/// First parameter is the length; allows for compressing an arbitrary block
197
/// length from any reader.
198
///
199
/// Second and third parameter are the pregenerated reader and destination
200
/// writ.er
201
///
202
/// # Example
203
///
204
/// ```Rust
205
/// use std::io::Cursor;
206
///
207
/// let mut input = Cursor::new(b"Hello World!");
208
/// let mut output = Cursor::new(Vec::new());
209
///
210
/// // Compress the input into the output
211
/// refpack::compress(input.len(), &mut input, &mut output);
212
/// // output now contains the compressed version of the input
213
/// ```
214
///
215
/// # Errors
216
///
217
/// Will return `Error::Io` if there is an IO error
218
/// Will return `Error::EmptyInput` if the length provided is 0
219
pub fn compress<F: Format>(
101,299✔
220
    length: usize,
101,299✔
221
    reader: &mut (impl Read + Seek),
101,299✔
222
    writer: &mut (impl Write + Seek),
101,299✔
223
) -> RefPackResult<()> {
101,299✔
224
    if length == 0 {
101,299✔
225
        return Err(RefPackError::EmptyInput);
1✔
226
    }
101,298✔
227

228
    let controls = encode_stream::<F>(reader, length)?;
101,298✔
229

230
    let header_length = F::HeaderMode::length(length);
101,298✔
231

232
    let header_position = writer.stream_position()?;
101,298✔
233
    let data_start_pos = writer.seek(SeekFrom::Current(header_length as i64))?;
101,298✔
234

235
    for control in controls {
309,418✔
236
        control.write::<F::ControlMode>(writer)?;
208,120✔
237
    }
238

239
    let data_end_pos = writer.stream_position()?;
101,298✔
240

241
    let compression_length = data_end_pos - data_start_pos;
101,298✔
242

101,298✔
243
    let header = Header {
101,298✔
244
        compressed_length: Some(compression_length as u32),
101,298✔
245
        decompressed_length: length as u32,
101,298✔
246
    };
101,298✔
247

101,298✔
248
    writer.seek(SeekFrom::Start(header_position))?;
101,298✔
249

250
    header.write::<F::HeaderMode>(writer)?;
101,298✔
251

252
    Ok(())
101,298✔
253
}
101,299✔
254

255
/// Wrapped compress function with a bit easier and cleaner of an API. Takes a
256
/// `&[u8]` slice of uncompressed bytes and returns a `Vec<u8>` of compressed
257
/// bytes
258
///
259
/// In implementation this just creates `Cursor`s for the reader and writer and
260
/// calls `compress`
261
///
262
/// Marked with `inline` so it should be inlined across crates and equivalent to
263
/// manually creating the cursors.
264
///
265
/// # Errors
266
///
267
/// Will return [RefPackError](crate::RefPackError) as relevant. All errors are
268
/// possible.
269
#[inline]
270
pub fn easy_compress<F: Format>(input: &[u8]) -> Result<Vec<u8>, RefPackError> {
101,299✔
271
    let mut reader = Cursor::new(input);
101,299✔
272
    let mut writer: Cursor<Vec<u8>> = Cursor::new(vec![]);
101,299✔
273
    compress::<F>(input.len(), &mut reader, &mut writer)?;
101,299✔
274
    Ok(writer.into_inner())
101,298✔
275
}
101,299✔
276

277
#[cfg(test)]
278
mod test {
279
    use proptest::prelude::*;
280
    use test_strategy::proptest;
281

282
    use super::*;
283
    use crate::format::Reference;
284

285
    #[proptest]
×
286
    #[ignore]
287
    fn large_input_compression(
288
        #[strategy(proptest::collection::vec(any::<u8>(), (100_000..=500_000)))] input: Vec<u8>,
×
289
    ) {
290
        let _unused = easy_compress::<Reference>(&input).unwrap();
291
    }
292

293
    #[test]
1✔
294
    fn empty_input_yields_error() {
1✔
295
        let input = vec![];
1✔
296
        let result = easy_compress::<Reference>(&input);
1✔
297
        assert!(result.is_err());
1✔
298
        assert!(matches!(result.unwrap_err(), RefPackError::EmptyInput));
1✔
299
    }
1✔
300
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc