• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

actioninja / refpack-rs / 6689328355

30 Oct 2023 06:54AM UTC coverage: 89.345% (-3.0%) from 92.374%
6689328355

Pull #6

github

web-flow
Merge 6f3e6b894 into f55f4d390
Pull Request #6: Demoded controls

294 of 294 new or added lines in 7 files covered. (100.0%)

914 of 1023 relevant lines covered (89.35%)

154499.47 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.21
/src/data/compression.rs
1
////////////////////////////////////////////////////////////////////////////////
2
// This Source Code Form is subject to the terms of the Mozilla Public         /
3
// License, v. 2.0. If a copy of the MPL was not distributed with this         /
4
// file, You can obtain one at https://mozilla.org/MPL/2.0/.                   /
5
//                                                                             /
6
////////////////////////////////////////////////////////////////////////////////
7

8
//! Compression scheme is heavily based on lz77. Exact compression algorithm may
9
//! be subject to change.
10
//!
11
//! Basic concept is to track literal bytes as you encounter them, and have some
12
//! way of identifying when current bytes match previously encountered
13
//! sequences.
14
//!
15
//! Current tracked literal bytes *must* be written before a back-reference
16
//! copy command is written
17
//!
18
//! Literal blocks have a max length of 112, and if this limit is reached
19
//! the literal sequence must be split into two (or more) blocks to properly
20
//! encode the literals
21
//!
22
//! Due to the limited precision of literal blocks, special handling is required
23
//! for writing literal blocks before copy or stop controls. The literal block
24
//! needs to be "split" to make the literal take an even multiple of 4 bytes.
25
//!
26
//! This is done by getting the modulus of the number of bytes modulo 4
27
//! and then subtracting this remainder from the total length.
28
//!
29
//! Simple pseudo-rust:
30
//! ```
31
//! let tracked_bytes_length = 117;
32
//! let num_bytes_in_copy = tracked_bytes_length % 4; // 1
33
//! let num_bytes_in_literal = 117 - num_bytes_in_copy; // 116; factors by 4
34
//! ```
35
//!
36
//! See [Command] for a specification of control codes
37
use std::cmp::max;
38
use std::collections::HashMap;
39
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
40

41
use crate::data::control::{
42
    Command,
43
    Control,
44
    COPY_LITERAL_MAX,
45
    LITERAL_MAX,
46
    LONG_LENGTH_MAX,
47
    LONG_LENGTH_MIN,
48
    LONG_OFFSET_MAX,
49
    MEDIUM_LENGTH_MIN,
50
    MEDIUM_OFFSET_MAX,
51
    SHORT_OFFSET_MAX,
52
    SHORT_OFFSET_MIN,
53
};
54
use crate::format::Format;
55
use crate::header::mode::Mode as HeaderMode;
56
use crate::header::Header;
57
use crate::{RefPackError, RefPackResult};
58

59
// Optimization trick from libflate_lz77
60
// Faster lookups for very large tables
61
#[derive(Debug)]
×
62
enum PrefixTable {
63
    Small(HashMap<[u8; 3], u32>),
64
    Large(LargePrefixTable),
65
}
66

67
impl PrefixTable {
68
    fn new(bytes: usize) -> Self {
101,038✔
69
        if bytes < LONG_OFFSET_MAX as usize {
101,038✔
70
            PrefixTable::Small(HashMap::new())
101,038✔
71
        } else {
72
            PrefixTable::Large(LargePrefixTable::new())
×
73
        }
74
    }
101,038✔
75

76
    fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
5,606,496✔
77
        match *self {
5,606,496✔
78
            PrefixTable::Small(ref mut table) => table.insert(prefix, position),
5,606,496✔
79
            PrefixTable::Large(ref mut table) => table.insert(prefix, position),
×
80
        }
81
    }
5,606,496✔
82
}
83

84
#[derive(Debug)]
×
85
struct LargePrefixTable {
86
    table: Vec<Vec<(u8, u32)>>,
87
}
88

89
impl LargePrefixTable {
90
    fn new() -> Self {
×
91
        LargePrefixTable {
×
92
            table: (0..=0xFFFF).map(|_| Vec::new()).collect(),
×
93
        }
×
94
    }
×
95

96
    fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option<u32> {
×
97
        let p0 = prefix[0] as usize;
×
98
        let p1 = prefix[1] as usize;
×
99
        let p2 = prefix[2];
×
100

×
101
        let index = (p0 << 8) | p1;
×
102
        let positions = &mut self.table[index];
×
103
        for &mut (key, ref mut value) in &mut *positions {
×
104
            if key == p2 {
×
105
                let old = *value;
×
106
                *value = position;
×
107
                return Some(old);
×
108
            }
×
109
        }
110
        positions.push((p2, position));
×
111
        None
×
112
    }
×
113
}
114

115
fn prefix(input_buf: &[u8]) -> [u8; 3] {
5,606,496✔
116
    let buf: &[u8] = &input_buf[..3];
5,606,496✔
117
    [buf[0], buf[1], buf[2]]
5,606,496✔
118
}
5,606,496✔
119

120
/// Reads from an incoming `Read` reader and compresses and encodes to
121
/// `Vec<Control>`
122
pub(crate) fn encode_stream(
101,038✔
123
    reader: &mut (impl Read + Seek),
101,038✔
124
    length: usize,
101,038✔
125
) -> Result<Vec<Control>, RefPackError> {
101,038✔
126
    let mut in_buffer = vec![0_u8; length];
101,038✔
127
    reader.read_exact(&mut in_buffer)?;
101,038✔
128
    let mut controls: Vec<Control> = vec![];
101,038✔
129
    let mut prefix_table = PrefixTable::new(in_buffer.len());
101,038✔
130

101,038✔
131
    let mut i = 0;
101,038✔
132
    let end = max(3, in_buffer.len()) - 3;
101,038✔
133
    let mut literal_block: Vec<u8> = Vec::with_capacity(LITERAL_MAX as usize);
101,038✔
134
    while i < end {
5,707,450✔
135
        let key = prefix(&in_buffer[i..]);
5,606,412✔
136

5,606,412✔
137
        // get the position of the prefix in the table (if it exists)
5,606,412✔
138
        let matched = prefix_table.insert(key, i as u32);
5,606,412✔
139

5,606,412✔
140
        let pair = matched.map(|x| x as usize).and_then(|matched| {
5,606,412✔
141
            let distance = i - matched;
50✔
142
            if distance > LONG_OFFSET_MAX as usize || distance < SHORT_OFFSET_MIN as usize {
50✔
143
                None
×
144
            } else {
145
                // find the longest common prefix
146
                let max_copy_len = LONG_LENGTH_MAX as usize;
50✔
147
                let match_length = in_buffer[i..]
50✔
148
                    .iter()
50✔
149
                    .take(max_copy_len - 3)
50✔
150
                    .zip(&in_buffer[matched..])
50✔
151
                    .take_while(|(a, b)| a == b)
201✔
152
                    .count();
50✔
153

50✔
154
                // Insufficient similarity for given distance, reject
50✔
155
                if (match_length <= MEDIUM_LENGTH_MIN as usize
50✔
156
                    && distance > SHORT_OFFSET_MAX as usize)
50✔
157
                    || (match_length <= LONG_LENGTH_MIN as usize
42✔
158
                        && distance > MEDIUM_OFFSET_MAX as usize)
42✔
159
                {
160
                    None
8✔
161
                } else {
162
                    Some((matched, match_length))
42✔
163
                }
164
            }
165
        });
5,606,412✔
166

167
        if let Some((found, match_length)) = pair {
5,606,412✔
168
            let distance = i - found;
42✔
169

42✔
170
            // If the current literal block is longer than the copy limit we need to split
42✔
171
            // the block
42✔
172
            if literal_block.len() > COPY_LITERAL_MAX as usize {
42✔
173
                let split_point: usize = literal_block.len() - (literal_block.len() % 4);
41✔
174
                controls.push(Control::new_literal_block(&literal_block[..split_point]));
41✔
175
                let second_block = &literal_block[split_point..];
41✔
176
                controls.push(Control::new(
41✔
177
                    Command::new(distance, match_length, second_block.len()),
41✔
178
                    second_block.to_vec(),
41✔
179
                ));
41✔
180
            } else {
41✔
181
                // If it's not, just push a new block directly
1✔
182
                controls.push(Control::new(
1✔
183
                    Command::new(distance, match_length, literal_block.len()),
1✔
184
                    literal_block.clone(),
1✔
185
                ));
1✔
186
            }
1✔
187
            literal_block.clear();
42✔
188

189
            for k in (i..).take(match_length).skip(1) {
84✔
190
                if k >= end {
84✔
191
                    break;
×
192
                }
84✔
193
                prefix_table.insert(prefix(&in_buffer[k..]), k as u32);
84✔
194
            }
195

196
            i += match_length;
42✔
197
        } else {
198
            literal_block.push(in_buffer[i]);
5,606,370✔
199
            i += 1;
5,606,370✔
200
            // If it's reached the limit, push the block immediately and clear the running
5,606,370✔
201
            // block
5,606,370✔
202
            if literal_block.len() >= (LITERAL_MAX as usize) {
5,606,370✔
203
                controls.push(Control::new_literal_block(&literal_block));
7,419✔
204
                literal_block.clear();
7,419✔
205
            }
5,598,951✔
206
        }
207
    }
208
    // Add remaining literals if there are any
209
    if i < in_buffer.len() {
101,038✔
210
        literal_block.extend_from_slice(&in_buffer[i..]);
101,038✔
211
    }
101,038✔
212
    // Extremely similar to block up above, but with a different control type
213
    if literal_block.len() > 3 {
101,038✔
214
        let split_point: usize = literal_block.len() - (literal_block.len() % 4);
97,932✔
215
        controls.push(Control::new_literal_block(&literal_block[..split_point]));
97,932✔
216
        controls.push(Control::new_stop(&literal_block[split_point..]));
97,932✔
217
    } else {
97,932✔
218
        controls.push(Control::new_stop(&literal_block));
3,106✔
219
    }
3,106✔
220

221
    Ok(controls)
101,038✔
222
}
101,038✔
223

224
/// Compress a data stream from a Reader to refpack format into a Writer.
225
///
226
/// First parameter is the length; allows for compressing an arbitrary block
227
/// length from any reader.
228
///
229
/// Second and third parameter are the pregenerated reader and destination
230
/// writ.er
231
///
232
/// # Example
233
///
234
/// ```Rust
235
/// use std::io::Cursor;
236
///
237
/// let mut input = Cursor::new(b"Hello World!");
238
/// let mut output = Cursor::new(Vec::new());
239
///
240
/// // Compress the input into the output
241
/// refpack::compress(input.len(), &mut input, &mut output);
242
/// // output now contains the compressed version of the input
243
/// ```
244
///
245
/// # Errors
246
/// - [RefPackError::EmptyInput]: Length provided is 0
247
/// - [RefPackError::Io]: Generic IO error when reading or writing
248
pub fn compress<F: Format>(
101,039✔
249
    length: usize,
101,039✔
250
    reader: &mut (impl Read + Seek),
101,039✔
251
    writer: &mut (impl Write + Seek),
101,039✔
252
) -> RefPackResult<()> {
101,039✔
253
    if length == 0 {
101,039✔
254
        return Err(RefPackError::EmptyInput);
1✔
255
    }
101,038✔
256

257
    let controls = encode_stream(reader, length)?;
101,038✔
258

259
    let header_length = F::HeaderMode::length(length);
101,038✔
260

261
    let header_position = writer.stream_position()?;
101,038✔
262
    let data_start_pos = writer.seek(SeekFrom::Current(header_length as i64))?;
101,038✔
263

264
    for control in controls {
307,510✔
265
        control.write(writer)?;
206,472✔
266
    }
267

268
    let data_end_pos = writer.stream_position()?;
101,038✔
269

270
    let compression_length = data_end_pos - data_start_pos;
101,038✔
271

101,038✔
272
    let header = Header {
101,038✔
273
        compressed_length: Some(compression_length as u32),
101,038✔
274
        decompressed_length: length as u32,
101,038✔
275
    };
101,038✔
276

101,038✔
277
    writer.seek(SeekFrom::Start(header_position))?;
101,038✔
278

279
    header.write::<F::HeaderMode>(writer)?;
101,038✔
280

281
    Ok(())
101,038✔
282
}
101,039✔
283

284
/// Wrapped compress function with a bit easier and cleaner of an API. Takes a
285
/// `&[u8]` slice of uncompressed bytes and returns a `Vec<u8>` of compressed
286
/// bytes
287
///
288
/// In implementation this just creates `Cursor`s for the reader and writer and
289
/// calls `compress`
290
///
291
/// Marked with `inline` so it should be inlined across crates and equivalent to
292
/// manually creating the cursors.
293
///
294
/// # Errors
295
/// - [RefPackError::EmptyInput]: Length provided is 0
296
/// - [RefPackError::Io]: Generic IO error when reading or writing
297
#[inline]
298
pub fn easy_compress<F: Format>(input: &[u8]) -> Result<Vec<u8>, RefPackError> {
101,039✔
299
    let mut reader = Cursor::new(input);
101,039✔
300
    let mut writer: Cursor<Vec<u8>> = Cursor::new(vec![]);
101,039✔
301
    compress::<F>(input.len(), &mut reader, &mut writer)?;
101,039✔
302
    Ok(writer.into_inner())
101,038✔
303
}
101,039✔
304

305
#[cfg(test)]
306
mod test {
307
    use proptest::prelude::*;
308
    use test_strategy::proptest;
309

310
    use super::*;
311
    use crate::format::Reference;
312

313
    #[proptest]
×
314
    #[ignore]
315
    fn large_input_compression(
316
        #[strategy(proptest::collection::vec(any::<u8>(), (100_000..=500_000)))] input: Vec<u8>,
×
317
    ) {
318
        let _unused = easy_compress::<Reference>(&input).unwrap();
319
    }
320

321
    #[test]
1✔
322
    fn empty_input_yields_error() {
1✔
323
        let input = vec![];
1✔
324
        let result = easy_compress::<Reference>(&input);
1✔
325
        assert!(result.is_err());
1✔
326
        assert!(matches!(result.unwrap_err(), RefPackError::EmptyInput));
1✔
327
    }
1✔
328
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc