• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

actioninja / refpack-rs / 20115958187

10 Dec 2025 10:56PM UTC coverage: 90.291% (-7.7%) from 97.965%
20115958187

push

github

actioninja
chore: Make longrunning CI not run in coverage!

1553 of 1720 relevant lines covered (90.29%)

359994.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.85
/src/data/compression/mod.rs
1
////////////////////////////////////////////////////////////////////////////////
2
// This Source Code Form is subject to the terms of the Mozilla Public         /
3
// License, v. 2.0. If a copy of the MPL was not distributed with this         /
4
// file, You can obtain one at https://mozilla.org/MPL/2.0/.                   /
5
//                                                                             /
6
////////////////////////////////////////////////////////////////////////////////
7

8
//! Compression scheme is heavily based on lz77. Exact compression algorithm may
9
//! be subject to change.
10
//!
11
//! Basic concept is to track literal bytes as you encounter them, and have some
12
//! way of identifying when current bytes match previously encountered
13
//! sequences.
14
//!
15
//! Current tracked literal bytes *must* be written before a back-reference
16
//! copy command is written
17
//!
18
//! Literal blocks have a max length of 112, and if this limit is reached
19
//! the literal sequence must be split into two (or more) blocks to properly
20
//! encode the literals
21
//!
22
//! Due to the limited precision of literal blocks, special handling is required
23
//! for writing literal blocks before copy or stop controls. The literal block
24
//! needs to be "split" to make the literal take an even multiple of 4 bytes.
25
//!
26
//! This is done by getting the modulus of the number of bytes modulo 4
27
//! and then subtracting this remainder from the total length.
28
//!
29
//! Simple pseudo-rust:
30
//! ```
31
//! let tracked_bytes_length = 117;
32
//! let num_bytes_in_copy = tracked_bytes_length % 4; // 1
33
//! let num_bytes_in_literal = 117 - num_bytes_in_copy; // 116; factors by 4
34
//! ```
35
//!
36
//! See [Command] for a specification of control codes
37
mod fast;
38
mod fastest;
39
pub(crate) mod match_length;
40
mod optimal;
41
pub(crate) mod prefix_search;
42

43
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
44

45
use crate::data::compression::fast::encode;
46
use crate::data::compression::optimal::{HASH_CHAINING_LEVELS, encode_slice_hc};
47
#[cfg(test)]
48
use crate::data::compression::prefix_search::hash_chain::HashChainPrefixSearcher;
49
use crate::data::compression::prefix_search::multi_level_hash_chain::MultiLevelPrefixSearcher;
50
use crate::data::control::{
51
    LONG_LENGTH_MAX,
52
    LONG_LENGTH_MIN,
53
    LONG_OFFSET_MAX,
54
    MEDIUM_LENGTH_MAX,
55
    MEDIUM_LENGTH_MIN,
56
    MEDIUM_OFFSET_MAX,
57
    SHORT_LENGTH_MAX,
58
    SHORT_OFFSET_MAX,
59
};
60
use crate::format::Format;
61
use crate::header::Header;
62
use crate::header::mode::Mode as HeaderMode;
63
use crate::{RefPackError, RefPackResult};
64

65
// used in both fast and high compression algorithms
66
fn bytes_for_match(length: usize, offset: usize) -> Option<(Option<usize>, usize)> {
1,077,508✔
67
    if offset > LONG_OFFSET_MAX as usize {
1,077,508✔
68
        return None;
×
69
    }
1,077,508✔
70
    if length >= LONG_LENGTH_MIN as usize {
1,077,508✔
71
        if length > MEDIUM_LENGTH_MAX as usize || offset > MEDIUM_OFFSET_MAX as usize {
456,429✔
72
            Some((Some(4), LONG_LENGTH_MAX as usize))
×
73
        } else if length > SHORT_LENGTH_MAX as usize || offset > SHORT_OFFSET_MAX as usize {
456,429✔
74
            Some((Some(3), MEDIUM_LENGTH_MAX as usize))
31,074✔
75
        } else {
76
            Some((Some(2), SHORT_LENGTH_MAX as usize))
425,355✔
77
        }
78
    } else if offset <= SHORT_OFFSET_MAX as usize {
621,079✔
79
        Some((Some(2), SHORT_LENGTH_MAX as usize))
621,068✔
80
    } else if offset <= MEDIUM_OFFSET_MAX as usize {
11✔
81
        if length >= MEDIUM_LENGTH_MIN as usize {
11✔
82
            Some((Some(3), MEDIUM_LENGTH_MAX as usize))
×
83
        } else {
84
            Some((None, MEDIUM_LENGTH_MIN as usize - 1))
11✔
85
        }
86
    } else {
87
        Some((None, LONG_LENGTH_MIN as usize - 1))
×
88
    }
89
}
1,077,508✔
90

91
/// Indicates preferences towards compressed size vs compression speed
92
///
93
/// refpack-rs has several algorithms for compression that have different use cases,
94
/// you may want to use an algorithm that is fast to run but gives suboptimal results,
95
/// or you could use an algorithm that is very slow to run but gives optimal results.
96
///
97
/// The default is the [Fast](CompressionOptions::Fast) algorithm.
98
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)]
99
#[non_exhaustive]
100
#[cfg_attr(test, derive(test_strategy::Arbitrary))]
101
pub enum CompressionOptions {
102
    /// Fastest algorithm intended for real-time use cases
103
    ///
104
    /// Expect speeds of 50-100MB/s, with compression ratios
105
    /// that are around 10-20% worse than the [Optimal](CompressionOptions::Optimal) algorithm.
106
    Fastest,
107
    #[default]
108
    /// Fast algorithm intended for data that is read a small amount of times
109
    ///
110
    /// Expect speeds of 5-10MB/s, with compression ratios
111
    /// that are around 5-10% worse than the [Optimal](CompressionOptions::Optimal) algorithm.
112
    Fast,
113
    /// Optimal algorithm intended for data that is read a large amount of times
114
    ///
115
    /// Expect speeds of 1-2MB/s
116
    ///
117
    /// This algorithm results in compression that is mathematically guaranteed to
118
    /// produce the smallest possible output for refpack compression.
119
    Optimal,
120
    /// DO NOT USE IN PRODUCTION
121
    ///
122
    /// Produces exactly the same output as [Optimal](CompressionOptions::Optimal),
123
    /// but uses a reference implementation for the internal prefix searcher.
124
    /// See [test::optimal_matches_reference] for details.
125
    #[cfg(test)]
126
    OptimalReference,
127
}
128

129
/// Compress a data stream from a Reader to refpack format into a Writer.
130
///
131
/// First parameter is the length; allows for compressing an arbitrary block
132
/// length from any reader.
133
///
134
/// Second and third parameter are the pregenerated reader and destination
135
/// writer
136
///
137
/// # Example
138
///
139
/// ```Rust
140
/// use std::io::Cursor;
141
///
142
/// let mut input = Cursor::new(b"Hello World!");
143
/// let mut output = Cursor::new(Vec::new());
144
///
145
/// // Compress the input into the output
146
/// refpack::compress(input.len(), &mut input, &mut output);
147
/// // output now contains the compressed version of the input
148
/// ```
149
///
150
/// # Errors
151
/// - [RefPackError::EmptyInput]: Length provided is 0
152
/// - [RefPackError::Io]: Generic IO error when reading or writing
153
pub fn compress<F: Format>(
258✔
154
    length: usize,
258✔
155
    reader: &mut (impl Read + Seek),
258✔
156
    writer: &mut (impl Write + Seek),
258✔
157
    compression_options: CompressionOptions,
258✔
158
) -> RefPackResult<()> {
258✔
159
    let mut buf = vec![0; length];
258✔
160
    reader.read_exact(buf.as_mut_slice())?;
258✔
161
    let out = easy_compress::<F>(&buf, compression_options)?;
258✔
162
    writer.write_all(&out)?;
258✔
163
    Ok(())
258✔
164
}
258✔
165

166
/// Wrapped compress function with a bit easier and cleaner of an API. Takes a
167
/// `&[u8]` slice of uncompressed bytes and returns a `Vec<u8>` of compressed
168
/// bytes
169
///
170
/// In implementation this just creates `Cursor`s for the reader and writer and
171
/// calls `compress`
172
///
173
/// Marked with `inline` so it should be inlined across crates and equivalent to
174
/// manually creating the cursors.
175
///
176
/// # Errors
177
/// - [RefPackError::EmptyInput]: Length provided is 0
178
/// - [RefPackError::Io]: Generic IO error when reading or writing
179
#[inline]
180
pub fn easy_compress<F: Format>(
101,308✔
181
    input: &[u8],
101,308✔
182
    compression_options: CompressionOptions,
101,308✔
183
) -> Result<Vec<u8>, RefPackError> {
101,308✔
184
    let mut writer: Cursor<Vec<u8>> = Cursor::new(vec![]);
101,308✔
185

186
    let length = input.len();
101,308✔
187

188
    if input.is_empty() {
101,308✔
189
        return Err(RefPackError::EmptyInput);
1✔
190
    }
101,307✔
191

192
    let controls = match compression_options {
101,307✔
193
        CompressionOptions::Fastest => fastest::encode(input),
25,464✔
194
        CompressionOptions::Fast => encode(input),
25,385✔
195
        CompressionOptions::Optimal => {
196
            encode_slice_hc::<MultiLevelPrefixSearcher<{ HASH_CHAINING_LEVELS }>>(input)
25,186✔
197
        }
198
        #[cfg(test)]
199
        CompressionOptions::OptimalReference => encode_slice_hc::<HashChainPrefixSearcher>(input),
25,272✔
200
    };
201

202
    let header_length = F::HeaderMode::length(length);
101,307✔
203

204
    let header_position = writer.stream_position()?;
101,307✔
205
    let data_start_pos = writer.seek(SeekFrom::Current(header_length as i64))?;
101,307✔
206

207
    for control in controls {
365,341✔
208
        control.write(&mut writer)?;
264,034✔
209
    }
210

211
    let data_end_pos = writer.stream_position()?;
101,307✔
212

213
    let compression_length = data_end_pos - data_start_pos;
101,307✔
214

215
    let header = Header {
101,307✔
216
        compressed_length: Some(compression_length as u32),
101,307✔
217
        decompressed_length: length as u32,
101,307✔
218
    };
101,307✔
219

220
    writer.seek(SeekFrom::Start(header_position))?;
101,307✔
221

222
    header.write::<F::HeaderMode>(&mut writer)?;
101,307✔
223
    Ok(writer.into_inner())
101,307✔
224
}
101,308✔
225

226
#[cfg(test)]
227
mod test {
228
    use proptest::prelude::*;
229
    use test_strategy::proptest;
230

231
    use super::*;
232
    use crate::format::Reference;
233

234
    #[proptest]
235
    #[ignore]
236
    fn large_input_compression(
237
        #[strategy(proptest::collection::vec(any::< u8 > (), 100_000..=500_000))] input: Vec<u8>,
238
        #[strategy(any::<CompressionOptions>())] options: CompressionOptions,
239
    ) {
240
        let _unused = easy_compress::<Reference>(&input, options).unwrap();
241
    }
242

243
    #[proptest]
244
    fn reader_compression(
245
        #[strategy(proptest::collection::vec(any::< u8 > (), 1..=256))] input: Vec<u8>,
246
        #[strategy(any::<CompressionOptions>())] options: CompressionOptions,
247
    ) {
248
        let length = input.len();
249
        let mut in_cursor = Cursor::new(input);
250
        let mut out_cursor = Cursor::new(Vec::new());
251
        compress::<Reference>(length, &mut in_cursor, &mut out_cursor, options).unwrap();
252
    }
253

254
    #[test]
255
    fn empty_input_yields_error() {
1✔
256
        let input = vec![];
1✔
257
        let result = easy_compress::<Reference>(&input, CompressionOptions::Fast);
1✔
258
        assert!(result.is_err());
1✔
259
        assert!(matches!(result.unwrap_err(), RefPackError::EmptyInput));
1✔
260
    }
1✔
261

262
    #[proptest]
263
    #[ignore]
264
    fn optimal_matches_reference(
265
        #[strategy(proptest::collection::vec(0..=3u8, 1..=1_000_000))] input: Vec<u8>,
266
    ) {
267
        println!("in: {}", input.len());
268
        let compressed_reference =
269
            easy_compress::<Reference>(&input, CompressionOptions::OptimalReference)?;
270
        let compressed_optimal = easy_compress::<Reference>(&input, CompressionOptions::Optimal)?;
271
        println!(
272
            "out ref: {} opt: {}",
273
            compressed_reference.len(),
274
            compressed_optimal.len()
275
        );
276
        prop_assert_eq!(
277
            &compressed_reference,
278
            &compressed_optimal,
279
            "Optimal compression should match the reference implementation."
280
        );
281
    }
282
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc