• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / 2591

30 Aug 2024 01:09AM UTC coverage: 91.107% (-0.01%) from 91.117%
2591

push

Evergreen

web-flow
Add basic translation of NSExceptions to Status (#8010)

When the Swift API is misused from within a callback from core a NSException
may escape, which we want to propagate. Since this is always a fatal error that
cannot be programmatically handled by the user, we can always translate this to
UnknownError rather than trying to preserve the original error code.

102828 of 181618 branches covered (56.62%)

1 of 1 new or added line in 1 file covered. (100.0%)

45 existing lines in 13 files now uncovered.

217355 of 238570 relevant lines covered (91.11%)

5738574.33 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.62
/src/realm/util/compression.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2022 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <realm/util/compression.hpp>
20
#include <realm/util/safe_int_ops.hpp>
21
#include <realm/util/scope_exit.hpp>
22

23
#include <cstring>
24
#include <limits>
25
#include <map>
26
#include <zlib.h>
27
#include <zconf.h> // for zlib
28

29
#if REALM_USE_LIBCOMPRESSION
30
#include <compression.h>
31
#include <os/availability.h>
32
#endif
33

34
using namespace realm;
35
using namespace util;
36

37
namespace {
38

39
enum class Algorithm {
40
    None = 0,
41
    Deflate = 1,
42
    Lzfse = 2,
43
};
44

45
using stream_avail_size_t = std::conditional_t<sizeof(uInt) < sizeof(size_t), uInt, size_t>;
46
constexpr stream_avail_size_t g_max_stream_avail = std::numeric_limits<stream_avail_size_t>::max();
47

48
stream_avail_size_t bounded_avail(size_t s)
49
{
119,250✔
50
    return s > g_max_stream_avail ? g_max_stream_avail : stream_avail_size_t(s);
119,250✔
51
}
119,250✔
52

53
Bytef* to_bytef(const char* str)
54
{
200,205✔
55
    return reinterpret_cast<Bytef*>(const_cast<char*>(str));
200,205✔
56
}
200,205✔
57

58
class ErrorCategoryImpl : public std::error_category {
59
public:
60
    const char* name() const noexcept override final
61
    {
×
62
        return "realm::util::compression::error";
×
63
    }
×
64
    std::string message(int err) const override final
65
    {
×
66
        using error = realm::util::compression::error;
×
67
        error e = error(err);
×
68
        switch (e) {
×
69
            case error::out_of_memory:
×
70
                return "Out of memory";
×
71
            case error::compress_buffer_too_small:
×
72
                return "Compression buffer too small";
×
73
            case error::compress_error:
×
74
                return "Compression error";
×
75
            case error::compress_input_too_long:
×
76
                return "Compression input too long";
×
77
            case error::corrupt_input:
×
78
                return "Corrupt input data";
×
79
            case error::incorrect_decompressed_size:
×
80
                return "Decompressed data size not equal to expected size";
×
81
            case error::decompress_error:
×
82
                return "Decompression error";
×
83
            case error::decompress_unsupported:
×
84
                return "Decompression failed due to unsupported input compression";
×
85
        }
×
86
        REALM_UNREACHABLE();
87
    }
×
88
};
89

90
ErrorCategoryImpl g_error_category;
91

92
void* custom_alloc(void* opaque, unsigned int cnt, unsigned int size)
93
{
134,355✔
94
    using Alloc = realm::util::compression::Alloc;
134,355✔
95
    Alloc& alloc = *static_cast<Alloc*>(opaque);
134,355✔
96
    std::size_t accum_size = cnt * std::size_t(size);
134,355✔
97
    return alloc.alloc(accum_size);
134,355✔
98
}
134,355✔
99

100
void custom_free(void* opaque, void* addr)
101
{
134,355✔
102
    using Alloc = realm::util::compression::Alloc;
134,355✔
103
    Alloc& alloc = *static_cast<Alloc*>(opaque);
134,355✔
104
    return alloc.free(addr);
134,355✔
105
}
134,355✔
106

107
void init_arena(compression::CompressMemoryArena& compress_memory_arena)
108
{
59,892✔
109
    if (compress_memory_arena.size() == 0) {
59,892✔
110
        // Zlib documentation says that with default settings deflate requires
111
        // at most 268 KB. We round up slightly.
112
        compress_memory_arena.resize(270 * 1024); // Throws
20,973✔
113
    }
20,973✔
114
    else {
38,919✔
115
        compress_memory_arena.reset();
38,919✔
116
    }
38,919✔
117
}
59,892✔
118

119
void grow_arena(compression::CompressMemoryArena& compress_memory_arena)
120
{
21,624✔
121
    std::size_t n = compress_memory_arena.size();
21,624✔
122
    REALM_ASSERT(n != 0);
21,624✔
123
    REALM_ASSERT(n != std::numeric_limits<std::size_t>::max());
21,624✔
124
    if (util::int_multiply_with_overflow_detect(n, 2))
21,624✔
125
        n = std::numeric_limits<std::size_t>::max();
×
126
    compress_memory_arena.resize(n); // Throws
21,624✔
127
}
21,624✔
128

129
uint8_t read_byte(InputStream& is, Span<const char>& buf)
130
{
740,886✔
131
    if (!buf.size())
740,886✔
132
        buf = is.next_block();
32,724✔
133
    if (buf.size()) {
740,886✔
134
        char c = buf.front();
708,210✔
135
        buf = buf.sub_span(1);
708,210✔
136
        return c;
708,210✔
137
    }
708,210✔
138
    return 0;
32,676✔
139
}
740,886✔
140

141
struct Header {
142
    Algorithm algorithm;
143
    size_t size;
144
};
145

146
Header read_header(InputStream& is, Span<const char>& buf)
147
{
359,190✔
148
    Header ret = {};
359,190✔
149
    auto first_byte = read_byte(is, buf);
359,190✔
150
    ret.algorithm = Algorithm(first_byte >> 4);
359,190✔
151
    size_t size_width = first_byte & 0b1111;
359,190✔
152
    if (size_width > sizeof(size_t))
359,190✔
153
        ret.size = -1;
12✔
154
    else {
359,178✔
155
        for (size_t i = 0; i < size_width; ++i) {
724,824✔
156
            ret.size += size_t(read_byte(is, buf)) << (i * 8);
365,646✔
157
        }
365,646✔
158
    }
359,178✔
159
    return ret;
359,190✔
160
}
359,190✔
161

162
uint8_t header_width(size_t size)
163
{
154,008✔
164
    uint8_t width = 0;
154,008✔
165
    while (size) {
342,042✔
166
        ++width;
188,034✔
167
        size >>= 8;
188,034✔
168
    }
188,034✔
169
    return width + 1;
154,008✔
170
}
154,008✔
171

172
size_t write_header(Header h, Span<char> target)
173
{
165,291✔
174
    uint8_t width = 0;
165,291✔
175
    target[0] = uint8_t(h.algorithm) << 4;
165,291✔
176
    for (size_t sz = h.size; sz; sz >>= 8, ++width) {
376,533✔
177
        target[width + 1] = uint8_t(sz & 0xFF);
211,242✔
178
        ++target[0];
211,242✔
179
    }
211,242✔
180
    return width + 1;
165,291✔
181
}
165,291✔
182

183
// Feed in a zlib header to inflate() for the places we don't store it
184
void inflate_zlib_header(z_stream& strm)
185
{
7,248✔
186
    Bytef out;
7,248✔
187
    strm.avail_in = 2;
7,248✔
188
    strm.next_in = to_bytef("\x78\x5e");
7,248✔
189
    strm.avail_out = sizeof(out);
7,248✔
190
    strm.next_out = &out;
7,248✔
191

192
    int rc = inflate(&strm, Z_SYNC_FLUSH);
7,248✔
193
    REALM_ASSERT(rc == Z_OK);
7,248✔
194
    REALM_ASSERT(strm.avail_in == 0);
7,248✔
195
}
7,248✔
196

197
struct DecompressInputStreamNone final : public InputStream {
198
    DecompressInputStreamNone(InputStream& s, Span<const char> b)
199
        : source(s)
43,035✔
200
        , current_block(b)
43,035✔
201
    {
109,035✔
202
        if (current_block.empty())
109,035✔
203
            current_block = source.next_block();
573✔
204
    }
109,035✔
205
    InputStream& source;
206
    Span<const char> current_block;
207

208
    Span<const char> next_block() override
209
    {
359,031✔
210
        auto ret = current_block;
359,031✔
211
        if (ret.size())
359,031✔
212
            current_block = source.next_block();
250,011✔
213
        return ret;
359,031✔
214
    }
359,031✔
215
};
216

217
class DecompressInputStreamZlib final : public InputStream {
218
public:
219
    DecompressInputStreamZlib(InputStream& s, Span<const char> b, size_t total_size)
220
        : m_source(s)
1,500✔
221
    {
1,500✔
222
        // Arbitrary upper limit to reduce peak memory usage
223
        constexpr const size_t max_out_buffer_size = 1024 * 1024;
1,500✔
224
        m_buffer.reserve(std::min(total_size, max_out_buffer_size));
1,500✔
225

226
        int rc = inflateInit(&m_strm);
1,500✔
227
        if (rc != Z_OK)
1,500✔
228
            throw std::system_error(make_error_code(compression::error::decompress_error), m_strm.msg);
×
229
        inflate_zlib_header(m_strm);
1,500✔
230

231
        m_strm.avail_in = bounded_avail(b.size());
1,500✔
232
        m_strm.next_in = to_bytef(b.data());
1,500✔
233
        m_current_block = b.sub_span(m_strm.avail_in);
1,500✔
234
    }
1,500✔
235

236
    ~DecompressInputStreamZlib()
237
    {
1,500✔
238
        inflateEnd(&m_strm);
1,500✔
239
    }
1,500✔
240

241
    Span<const char> next_block() override
242
    {
4,911✔
243
        m_buffer.resize(m_buffer.capacity());
4,911✔
244
        m_strm.avail_out = bounded_avail(m_buffer.size());
4,911✔
245
        m_strm.next_out = to_bytef(m_buffer.data());
4,911✔
246

247
        while (true) {
5,307✔
248
            // We may have some leftover input buffer from a previous call if the
249
            // inflated result didn't fit in the output buffer. If not, we need to
250
            // fetch the next block.
251
            if (m_strm.avail_in == 0) {
5,307✔
252
                m_current_block = m_source.next_block();
2,586✔
253
                if (m_current_block.size()) {
2,586✔
254
                    m_strm.next_in = to_bytef(m_current_block.data());
1,098✔
255
                    m_strm.avail_in = bounded_avail(m_current_block.size());
1,098✔
256
                }
1,098✔
257
            }
2,586✔
258

259
            m_strm.total_out = 0;
5,307✔
260
            auto rc = inflate(&m_strm, m_strm.avail_in ? Z_SYNC_FLUSH : Z_FINISH);
5,307✔
261
            REALM_ASSERT(rc == Z_OK || rc == Z_STREAM_END || rc == Z_BUF_ERROR);
5,307!
262

263
            if (m_strm.total_out) {
5,307✔
264
                // We got some output, so return that. We might also have reached
265
                // the end of the stream, which'll be reported on the next call
266
                // if so.
267
                REALM_ASSERT(m_strm.total_out <= m_buffer.capacity());
3,411✔
268
                m_buffer.resize(m_strm.total_out);
3,411✔
269
                return m_buffer;
3,411✔
270
            }
3,411✔
271

272
            if (rc != Z_OK) {
1,896✔
273
                // We reached the end of the stream without producing more data, so
274
                // we're done.
275
                return {nullptr, nullptr};
1,500✔
276
            }
1,500✔
277

278
            // Otherwise we produced no output but also didn't reach the end of the
279
            // stream, so we need to feed more data in.
280
        }
1,896✔
281
    }
4,911✔
282

283
private:
284
    InputStream& m_source;
285
    Span<const char> m_current_block;
286
    z_stream m_strm = {};
287
    AppendBuffer<char> m_buffer;
288
};
289

290
#if REALM_USE_LIBCOMPRESSION
291

292
compression_algorithm algorithm_to_compression_algorithm(Algorithm a)
293
{
15,996✔
294
    switch (Algorithm(a)) {
15,996✔
295
        case Algorithm::Deflate:
8,034✔
296
            return COMPRESSION_ZLIB;
8,034✔
297
        case Algorithm::Lzfse:
7,959✔
298
            return COMPRESSION_LZFSE;
7,959✔
299
        default:
3✔
300
            return (compression_algorithm)0;
3✔
301
    }
15,996✔
302
}
15,996✔
303

304
API_AVAILABLE_BEGIN(macos(10.11))
305

306
class DecompressInputStreamLibCompression final : public InputStream {
307
public:
308
    DecompressInputStreamLibCompression(InputStream& s, Span<const char> b, Header h)
309
        : m_source(s)
310
    {
1,494✔
311
        // Arbitrary upper limit to reduce peak memory usage
312
        constexpr const size_t max_out_buffer_size = 1024 * 1024;
1,494✔
313
        m_buffer.reserve(std::min(h.size, max_out_buffer_size));
1,494✔
314
        auto rc = compression_stream_init(&m_strm, COMPRESSION_STREAM_DECODE,
1,494✔
315
                                          algorithm_to_compression_algorithm(h.algorithm));
1,494✔
316
        if (rc != COMPRESSION_STATUS_OK)
1,494✔
317
            throw std::system_error(compression::error::decompress_error);
318
        m_strm.src_size = b.size();
1,494✔
319
        m_strm.src_ptr = to_bytef(b.data());
1,494✔
320
    }
1,494✔
321

322
    ~DecompressInputStreamLibCompression()
323
    {
1,494✔
324
        compression_stream_destroy(&m_strm);
1,494✔
325
    }
1,494✔
326

327
    Span<const char> next_block() override
328
    {
4,218✔
329
        m_buffer.resize(m_buffer.capacity());
4,218✔
330
        m_strm.dst_size = m_buffer.size();
4,218✔
331
        m_strm.dst_ptr = to_bytef(m_buffer.data());
4,218✔
332

333
        while (true) {
5,700✔
334
            // We may have some leftover input buffer from a previous call if the
335
            // inflated result didn't fit in the output buffer. If not, we need to
336
            // fetch the next block.
337
            bool end = false;
5,700✔
338
            if (m_strm.src_size == 0) {
5,700✔
339
                if (auto block = m_source.next_block(); block.size()) {
4,110✔
340
                    m_strm.src_ptr = to_bytef(block.data());
1,509✔
341
                    m_strm.src_size = block.size();
1,509✔
342
                }
1,509✔
343
                else {
2,601✔
344
                    end = true;
2,601✔
345
                }
2,601✔
346
            }
4,110✔
347

348
            auto rc = compression_stream_process(&m_strm, end ? COMPRESSION_STREAM_FINALIZE : 0);
5,700✔
349
            if (rc == COMPRESSION_STATUS_ERROR)
5,700✔
350
                throw std::system_error(compression::error::corrupt_input);
351
            auto bytes_written = m_buffer.size() - m_strm.dst_size;
5,700✔
352
            if (bytes_written) {
5,700✔
353
                // We got some output, so return that. We might also have reached
354
                // the end of the stream, which'll be reported on the next call
355
                // if so.
356
                m_buffer.resize(bytes_written);
2,724✔
357
                return m_buffer;
2,724✔
358
            }
2,724✔
359
            if (rc == COMPRESSION_STATUS_END) {
2,976✔
360
                // We reached the end of the stream and are done
361
                return {nullptr, nullptr};
1,494✔
362
            }
1,494✔
363

364
            if (end) {
1,482✔
365
                // We ran out of input data but didn't get COMPRESSION_STATUS_END,
366
                // so the input is truncated
367
                throw std::system_error(compression::error::corrupt_input);
368
            }
369

370
            // Otherwise we produced no output but also didn't reach the end of the
371
            // stream, so we need to feed more data in.
372
        }
1,482✔
373
    }
4,218✔
374

375
private:
376
    InputStream& m_source;
377
    compression_stream m_strm = {};
378
    AppendBuffer<char> m_buffer;
379
};
380

381
API_AVAILABLE_END
382
#endif
383

384
std::error_code decompress_none(InputStream& compressed, Span<const char> compressed_buf, Span<char> decompressed_buf)
385
{
58,611✔
386
    do {
58,611✔
387
        auto count = std::min(decompressed_buf.size(), compressed_buf.size());
58,611✔
388
        std::memcpy(decompressed_buf.data(), compressed_buf.data(), count);
58,611✔
389
        decompressed_buf = decompressed_buf.sub_span(count);
58,611✔
390
        compressed_buf = compressed.next_block();
58,611✔
391
    } while (compressed_buf.size() && decompressed_buf.size());
58,611!
392

393
    if (compressed_buf.size() || decompressed_buf.size()) {
58,611✔
394
        return compression::error::incorrect_decompressed_size;
×
395
    }
×
396
    return std::error_code{};
58,611✔
397
}
58,611✔
398

399
std::error_code decompress_zlib(InputStream& compressed, Span<const char> compressed_buf, Span<char> decompressed_buf,
400
                                bool has_header)
401
{
13,458✔
402
    using namespace compression;
13,458✔
403

404
    z_stream strm = {};
13,458✔
405
    int rc = inflateInit(&strm);
13,458✔
406
    if (rc != Z_OK)
13,458✔
407
        return error::decompress_error;
×
408
    util::ScopeExit cleanup([&]() noexcept {
13,458✔
409
        // inflateEnd() only fails if we modified the fields of strm in an invalid way
410
        int rc = inflateEnd(&strm);
13,458✔
411
        REALM_ASSERT(rc == Z_OK);
13,458✔
412
        static_cast<void>(rc);
13,458✔
413
    });
13,458✔
414

415
    if (!has_header)
13,458✔
416
        inflate_zlib_header(strm);
5,748✔
417

418
    do {
14,232✔
419
        size_t in_offset = 0;
14,232✔
420

421
        // This loop will typically run exactly once. If size_t is larger than
422
        // uInt (as it is on most 64-bit platforms), input or output larger than
423
        // uInt's upper bound will require multiple iterations of passing data
424
        // to zlib.
425
        while (in_offset < compressed_buf.size()) {
15,012✔
426
            strm.avail_in = bounded_avail(compressed_buf.size() - in_offset);
14,235✔
427
            strm.next_in = to_bytef(compressed_buf.data() + in_offset);
14,235✔
428
            strm.next_out = to_bytef(decompressed_buf.data());
14,235✔
429
            strm.avail_out = bounded_avail(decompressed_buf.size());
14,235✔
430
            strm.total_in = 0;
14,235✔
431
            strm.total_out = 0;
14,235✔
432

433
            int rc = inflate(&strm, Z_SYNC_FLUSH);
14,235✔
434
            REALM_ASSERT(rc != Z_STREAM_ERROR && rc != Z_MEM_ERROR);
14,235✔
435
            in_offset += strm.total_in;
14,235✔
436
            decompressed_buf = decompressed_buf.sub_span(strm.total_out);
14,235✔
437

438
            if (rc == Z_OK) {
14,235✔
439
                // We made forward progress but did not reach the end
440
                continue;
780✔
441
            }
780✔
442
            if (rc == Z_STREAM_END) {
13,455✔
443
                // If we got Z_STREAM_END and there's leftover input then the
444
                // data is invalid
445
                if (strm.avail_in || in_offset < compressed_buf.size() || compressed.next_block().size())
13,449✔
446
                    return error::corrupt_input;
3✔
447
                if (decompressed_buf.size() != 0)
13,446✔
448
                    return error::incorrect_decompressed_size;
3✔
449
                return std::error_code{};
13,443✔
450
            }
13,446✔
451
            if (rc == Z_NEED_DICT) {
6✔
452
                // We don't support custom dictionaries
453
                return error::decompress_unsupported;
×
454
            }
×
455
            if (rc == Z_DATA_ERROR) {
6✔
456
                return error::corrupt_input;
3✔
457
            }
3✔
458
            if (rc == Z_BUF_ERROR) {
3✔
459
                if (strm.avail_out == 0) {
3✔
460
                    if (decompressed_buf.size() > 0) {
3✔
461
                        // We need to pass in the next range of the decompress buffer
462
                        continue;
×
463
                    }
×
464
                    // We should never run out of output buffer space unless the
465
                    // decompressed size was wrong.
466
                    return error::incorrect_decompressed_size;
3✔
467
                }
3✔
468
                // If there's space left in the output buffer then that means
469
                // we ran out of input without getting Z_STREAM_END
470
                return error::corrupt_input;
×
471
            }
3✔
472

473
            // Unknown error code
474
            REALM_UNREACHABLE();
475
        }
×
476
    } while ((compressed_buf = compressed.next_block()), compressed_buf.size());
14,232✔
477

478
    if (strm.avail_in && !strm.avail_out) {
3!
479
        // Ran out of output buffer with remaining input
480
        return error::incorrect_decompressed_size;
×
481
    }
×
482

483
    // We ran out of input without getting Z_STREAM_END
484
    return error::corrupt_input;
3✔
485
}
3✔
486

487
#if REALM_USE_LIBCOMPRESSION
488
API_AVAILABLE_BEGIN(macos(10.11))
489
std::error_code decompress_libcompression(InputStream& compressed, Span<const char> compressed_buf,
490
                                          Span<char> decompressed_buf, Algorithm algorithm, bool has_header)
491
{
14,502✔
492
    using namespace compression;
14,502✔
493

494
    // If we're given a buffer with a zlib header we have to parse it outselves,
495
    // as libcompression doesn't handle it.
496
    if (has_header) {
14,502✔
497
        // The first nibble is compression algorithm (where 8 is DEFLATE), and second
498
        // nibble is window size. RFC 1950 only allows window size 7, so the first
499
        // byte must be 0x78.
500
        if (read_byte(compressed, compressed_buf) != 0x78)
8,034✔
501
            return error::corrupt_input;
502
        // The second byte has flags. Bit 5 is the only interesting one, which
503
        // indicates if a custom dictionary was used. We don't support that.
504
        uint8_t flags = read_byte(compressed, compressed_buf);
8,034✔
505
        if (flags & 0b100000)
8,034✔
506
            return error::decompress_unsupported;
507
        algorithm = Algorithm::Deflate;
8,034✔
508
    }
8,034✔
509

510
    auto compression_algorithm = algorithm_to_compression_algorithm(algorithm);
14,502✔
511
    if (!compression_algorithm)
14,502✔
512
        return error::decompress_unsupported;
3✔
513

514
    compression_stream strm;
14,499✔
515
    auto rc = compression_stream_init(&strm, COMPRESSION_STREAM_DECODE, compression_algorithm);
14,499✔
516
    if (rc != COMPRESSION_STATUS_OK)
14,499✔
517
        return error::decompress_error;
518

519
    // Using ScopeExit here warns about missing availability checking, but also
520
    // complains about redundant availability checking if it's added.
521
    struct Cleanup {
14,499✔
522
        compression_stream* strm;
14,499✔
523
        ~Cleanup()
14,499✔
524
        {
14,499✔
525
            compression_stream_destroy(strm);
14,499✔
526
        }
14,499✔
527
    } cleanup{&strm};
14,499✔
528

529
    strm.dst_size = decompressed_buf.size();
14,499✔
530
    strm.dst_ptr = to_bytef(decompressed_buf.data());
14,499✔
531

532
    uint32_t expected_checksum = 0;
14,499✔
533
    uLong actual_checksum = 1;
14,499✔
534
    do {
15,270✔
535
        strm.src_size = compressed_buf.size();
15,270✔
536
        strm.src_ptr = to_bytef(compressed_buf.data());
15,270✔
537

538
        // compression_stream_process() only writes 64 KB at a time, and you
539
        // have to call it in a loop until it stops giving more output before
540
        // feeding in more input
541
        while (rc != COMPRESSION_STATUS_END) {
44,607✔
542
            auto dst_ptr_start = strm.dst_ptr;
30,084✔
543
            rc = compression_stream_process(&strm, 0);
30,084✔
544
            if (rc == COMPRESSION_STATUS_ERROR)
30,084✔
545
                return error::corrupt_input;
546
            if (strm.dst_ptr == dst_ptr_start)
30,084✔
547
                break;
747✔
548

549
            // libcompression doesn't check the checksum, so do it manually.
550
            // This loop will never actually run multiple times as in practice
551
            // libcompression doesn't actually write more bytes than fit in uLong
552
            // in a single call to compression_stream_process()
553
            while (dst_ptr_start < strm.dst_ptr) {
58,674✔
554
                auto size = bounded_avail(strm.dst_ptr - dst_ptr_start);
29,337✔
555
                actual_checksum = adler32(actual_checksum, dst_ptr_start, size);
29,337✔
556
                dst_ptr_start += size;
29,337✔
557
            }
29,337✔
558
        }
29,337✔
559

560
        // The checksum at the end can potentially be straddling a block boundary
561
        // and we can't rewind, so maintain a rolling window of the last four
562
        // bytes seen.
563
        for (uint8_t byte : compressed_buf.last(std::min<size_t>(4u, compressed_buf.size()))) {
59,595✔
564
            expected_checksum <<= 8;
59,595✔
565
            expected_checksum += byte;
59,595✔
566
        }
59,595✔
567
    } while ((compressed_buf = compressed.next_block()), compressed_buf.size());
15,270✔
568
    rc = compression_stream_process(&strm, COMPRESSION_STREAM_FINALIZE);
14,499✔
569
    if (rc != COMPRESSION_STATUS_END)
14,499✔
570
        return error::corrupt_input;
6✔
571
    if (strm.dst_size != 0)
14,493✔
572
        return error::incorrect_decompressed_size;
3✔
573
    if (expected_checksum != actual_checksum)
14,490✔
574
        return error::corrupt_input;
6✔
575
    // Check for remaining extra input
576
    if (strm.src_size || compressed.next_block().size())
14,484✔
577
        return error::corrupt_input;
578
    return std::error_code{};
14,484✔
579
}
14,484✔
580
API_AVAILABLE_END
581
#endif
582

583
std::error_code decompress(InputStream& compressed, Span<const char> compressed_buf, Span<char> decompressed_buf,
584
                           Algorithm algorithm, bool has_header)
585
{
86,583✔
586
    using namespace compression;
86,583✔
587

588
    if (decompressed_buf.size() == 0) {
86,583✔
589
        return std::error_code{};
12✔
590
    }
12✔
591
    if (!compressed_buf.size()) {
86,571✔
592
        return error::incorrect_decompressed_size;
×
593
    }
×
594

595
#if REALM_USE_LIBCOMPRESSION
43,347✔
596
    if (algorithm != Algorithm::None)
43,347✔
597
        return decompress_libcompression(compressed, compressed_buf, decompressed_buf, algorithm, has_header);
14,502✔
598
#endif
28,845✔
599

600
    switch (algorithm) {
72,069✔
601
        case Algorithm::None:
58,608✔
602
            return decompress_none(compressed, compressed_buf, decompressed_buf);
58,608✔
603
        case Algorithm::Deflate:
13,458✔
604
            return decompress_zlib(compressed, compressed_buf, decompressed_buf, has_header);
13,458✔
605
        default:
3✔
606
            return error::decompress_unsupported;
3✔
607
    }
72,069✔
608
}
72,069✔
609

610
#if 0
611
struct CompressionStats {
612
    std::mutex mutex;
613
    std::map<size_t, std::pair<size_t, size_t>> stats;
614
    ~CompressionStats()
615
    {
616
        std::lock_guard lock(mutex);
617
        size_t total_uncompressed = 0;
618
        size_t total_compressed = 0;
619
        for (auto& [size, results] : stats) {
620
            fprintf(stderr, "%zu: %zu %g\n", size, results.first, static_cast<double>(results.second) / results.first / size * 100);
621
            total_uncompressed += size * results.first;
622
            total_compressed += results.second;
623
        }
624
        fprintf(stderr, "total: %zu -> %zu (%g%%)\n", total_uncompressed, total_compressed, (double)total_compressed / total_uncompressed * 100.0);
625
    }
626
} s_compression_stats;
627

628
void record_compression_result(size_t uncompressed, size_t compressed)
629
{
630
    std::lock_guard lock(s_compression_stats.mutex);
631
    auto& arr = s_compression_stats.stats[uncompressed];
632
    arr.first++;
633
    arr.second += compressed;
634
}
635
#else
636
void record_compression_result(size_t, size_t) {}
143,658✔
637
#endif
638

639
#if REALM_USE_LIBCOMPRESSION
640
API_AVAILABLE_BEGIN(macos(10.11))
641
std::error_code compress_lzfse(Span<const char> uncompressed_buf, Span<char> compressed_buf,
642
                               std::size_t& compressed_size, compression::Alloc* custom_allocator)
643
{
33,021✔
644
    using namespace compression;
33,021✔
645
    if (compressed_buf.size() < 4)
33,021✔
646
        return error::compress_buffer_too_small;
647
    // compression_encode_buffer() takes a size_t, but crashes if the value is
648
    // larger than 2^31. Using the stream API works, but it's slower for
649
    // normal-sized input, and we can just fall back to zlib for this edge case.
650
    if (uncompressed_buf.size() > std::numeric_limits<int32_t>::max())
33,021✔
651
        return error::compress_input_too_long;
652

653
    auto uncompressed_ptr = to_bytef(uncompressed_buf.data());
33,021✔
654
    auto uncompressed_size = uncompressed_buf.size();
33,021✔
655
    auto compressed_ptr = to_bytef(compressed_buf.data());
33,021✔
656
    auto compressed_buf_size = compressed_buf.size() - 4;
33,021✔
657

658
    void* scratch_buffer = nullptr;
33,021✔
659
    if (custom_allocator) {
33,021✔
660
        scratch_buffer = custom_allocator->alloc(compression_encode_scratch_buffer_size(COMPRESSION_LZFSE));
33,021✔
661
        if (!scratch_buffer)
33,021✔
662
            return error::out_of_memory;
21,624✔
663
    }
33,021✔
664

665
    size_t bytes = compression_encode_buffer(compressed_ptr, compressed_buf_size, uncompressed_ptr, uncompressed_size,
11,397✔
666
                                             scratch_buffer, COMPRESSION_LZFSE);
11,397✔
667
    if (bytes == 0)
11,397✔
668
        return error::compress_buffer_too_small;
12✔
669

670
    // Calculate the checksum and append it to the end of the stream
671
    uLong checksum = htonl(adler32(1, uncompressed_ptr, static_cast<uInt>(uncompressed_size)));
11,385✔
672
    for (int i = 0; i < 4; ++i) {
56,925✔
673
        compressed_buf[bytes + i] = checksum & 0xFF;
45,540✔
674
        checksum >>= 8;
45,540✔
675
    }
45,540✔
676
    compressed_size = bytes + 4;
11,385✔
677
    return std::error_code{};
11,385✔
678
}
11,397✔
679
API_AVAILABLE_END
680
#endif
681

682
std::error_code compress_lzfse_or_zlib(Span<const char> uncompressed_buf, Span<char> compressed_buf,
683
                                       std::size_t& compressed_size, int compression_level,
684
                                       compression::Alloc* custom_allocator)
685
{
43,374✔
686
    using namespace compression;
43,374✔
687
#if REALM_USE_LIBCOMPRESSION
33,021✔
688
    {
33,021✔
689
        size_t len = write_header({Algorithm::Lzfse, uncompressed_buf.size()}, compressed_buf);
33,021✔
690
        auto ec = compress_lzfse(uncompressed_buf, compressed_buf.sub_span(len), compressed_size, custom_allocator);
33,021✔
691
        if (ec != error::compress_input_too_long)
33,021✔
692
            return ec;
33,021✔
693
    }
33,021✔
694
#endif
695
    size_t len = header_width(uncompressed_buf.size());
10,353✔
696
    REALM_ASSERT(len >= 2);
10,353✔
697
    auto ec = compress(uncompressed_buf, compressed_buf.sub_span(len - 2), compressed_size, compression_level,
10,353✔
698
                       custom_allocator);
10,353✔
699
    if (!ec) {
10,353✔
700
        // Note: overwrites zlib header
701
        write_header({Algorithm::Deflate, uncompressed_buf.size()}, compressed_buf);
10,341✔
702
        compressed_size -= 2;
10,341✔
703
    }
10,341✔
704
    return ec;
10,353✔
705
}
43,374✔
706
} // unnamed namespace
707

708

709
const std::error_category& compression::error_category() noexcept
710
{
×
711
    return g_error_category;
×
712
}
×
713

714
std::error_code compression::make_error_code(error error_code) noexcept
715
{
219,354✔
716
    return std::error_code(int(error_code), g_error_category);
219,354✔
717
}
219,354✔
718

719

720
// zlib compression level: 1-9, 1 fastest.
721

722
// zlib deflateBound()
723
std::size_t compression::compress_bound(std::size_t size) noexcept
724
{
96✔
725
    // DEFLATE's worst-case size is a 6 byte zlib header, plus the uncompressed
726
    // data, plus a 5 byte header for every 16383 byte block.
727
    size_t overhead = 6 + 5 * (size / 16383 + 1);
96✔
728
    if (std::numeric_limits<size_t>::max() - overhead < size)
96✔
729
        return 0;
×
730
    return size + overhead;
96✔
731
}
96✔
732

733

734
// zlib deflate()
735
std::error_code compression::compress(Span<const char> uncompressed_buf, Span<char> compressed_buf,
736
                                      std::size_t& compressed_size, int compression_level, Alloc* custom_allocator)
737
{
26,973✔
738
    auto uncompressed_ptr = to_bytef(uncompressed_buf.data());
26,973✔
739
    auto uncompressed_size = uncompressed_buf.size();
26,973✔
740
    auto compressed_ptr = to_bytef(compressed_buf.data());
26,973✔
741
    auto compressed_buf_size = compressed_buf.size();
26,973✔
742

743
    z_stream strm = {};
26,973✔
744
    if (custom_allocator) {
26,973✔
745
        strm.opaque = custom_allocator;
26,871✔
746
        strm.zalloc = &custom_alloc;
26,871✔
747
        strm.zfree = &custom_free;
26,871✔
748
    }
26,871✔
749

750
    int rc = deflateInit(&strm, compression_level);
26,973✔
751
    if (rc == Z_MEM_ERROR)
26,973✔
752
        return error::out_of_memory;
×
753

754
    if (rc != Z_OK)
26,973✔
755
        return error::compress_error;
×
756

757
    strm.next_in = uncompressed_ptr;
26,973✔
758
    strm.avail_in = 0;
26,973✔
759
    strm.next_out = compressed_ptr;
26,973✔
760
    strm.avail_out = 0;
26,973✔
761

762
    std::size_t next_in_ndx = 0;
26,973✔
763
    std::size_t next_out_ndx = 0;
26,973✔
764
    REALM_ASSERT(rc == Z_OK);
26,973✔
765
    while (rc == Z_OK || rc == Z_BUF_ERROR) {
56,874✔
766
        REALM_ASSERT(strm.next_in + strm.avail_in == uncompressed_ptr + next_in_ndx);
32,829✔
767
        REALM_ASSERT(strm.next_out + strm.avail_out == compressed_ptr + next_out_ndx);
32,829✔
768

769
        bool stream_updated = false;
32,829✔
770

771
        if (strm.avail_in == 0 && next_in_ndx < uncompressed_size) {
32,829✔
772
            auto in_size = bounded_avail(uncompressed_size - next_in_ndx);
26,961✔
773
            next_in_ndx += in_size;
26,961✔
774
            strm.avail_in = uInt(in_size);
26,961✔
775
            stream_updated = true;
26,961✔
776
        }
26,961✔
777

778
        if (strm.avail_out == 0 && next_out_ndx < compressed_buf_size) {
32,829✔
779
            auto out_size = bounded_avail(compressed_buf_size - next_out_ndx);
26,973✔
780
            next_out_ndx += out_size;
26,973✔
781
            strm.avail_out = uInt(out_size);
26,973✔
782
            stream_updated = true;
26,973✔
783
        }
26,973✔
784

785
        if (rc == Z_BUF_ERROR && !stream_updated) {
32,829✔
786
            deflateEnd(&strm);
2,928✔
787
            return error::compress_buffer_too_small;
2,928✔
788
        }
2,928✔
789

790
        int flush = (next_in_ndx == uncompressed_size) ? Z_FINISH : Z_NO_FLUSH;
29,901✔
791

792
        rc = deflate(&strm, flush);
29,901✔
793
        REALM_ASSERT(rc != Z_STREAM_END || flush == Z_FINISH);
29,901✔
794
    }
29,901✔
795

796
    if (rc != Z_STREAM_END) {
24,045✔
797
        deflateEnd(&strm);
×
798
        return error::compress_error;
×
799
    }
×
800

801
    compressed_size = next_out_ndx - strm.avail_out;
24,045✔
802

803
    rc = deflateEnd(&strm);
24,045✔
804
    if (rc != Z_OK)
24,045✔
805
        return error::compress_error;
×
806

807
    return std::error_code{};
24,045✔
808
}
24,045✔
809

810
std::error_code compression::decompress(InputStream& compressed, Span<char> decompressed_buf)
811
{
120✔
812
    return ::decompress(compressed, compressed.next_block(), decompressed_buf, Algorithm::Deflate, true);
120✔
813
}
120✔
814

815
std::error_code compression::decompress(Span<const char> compressed_buf, Span<char> decompressed_buf)
816
{
15,636✔
817
    SimpleInputStream adapter(compressed_buf);
15,636✔
818
    return ::decompress(adapter, adapter.next_block(), decompressed_buf, Algorithm::Deflate, true);
15,636✔
819
}
15,636✔
820

821
std::error_code compression::decompress_nonportable(InputStream& compressed, AppendBuffer<char>& decompressed)
822
{
72,132✔
823
    auto compressed_buf = compressed.next_block();
72,132✔
824
    auto header = read_header(compressed, compressed_buf);
72,132✔
825
    if (header.size == std::numeric_limits<size_t>::max())
72,132✔
826
        return error::out_of_memory;
6✔
827
    decompressed.resize(header.size);
72,126✔
828
    if (header.size == 0)
72,126✔
829
        return std::error_code{};
1,296✔
830
    return ::decompress(compressed, compressed_buf, decompressed, header.algorithm, false);
70,830✔
831
}
72,126✔
832

833
std::error_code compression::allocate_and_compress(CompressMemoryArena& compress_memory_arena,
834
                                                   Span<const char> uncompressed_buf,
835
                                                   std::vector<char>& compressed_buf)
836
{
13,608✔
837
    const int compression_level = 1;
13,608✔
838
    std::size_t compressed_size = 0;
13,608✔
839

840
    if (compressed_buf.size() < 256)
13,608✔
841
        compressed_buf.resize(256); // Throws
12,672✔
842

843
    for (;;) {
16,518✔
844
        init_arena(compress_memory_arena);
16,518✔
845
        std::error_code ec = compression::compress(uncompressed_buf, compressed_buf, compressed_size,
16,518✔
846
                                                   compression_level, &compress_memory_arena);
16,518✔
847

848
        if (REALM_UNLIKELY(ec)) {
16,518✔
849
            if (ec == compression::error::compress_buffer_too_small) {
2,910✔
850
                std::size_t n = compressed_buf.size();
2,910✔
851
                REALM_ASSERT(n != std::numeric_limits<std::size_t>::max());
2,910✔
852
                if (util::int_multiply_with_overflow_detect(n, 2))
2,910✔
853
                    n = std::numeric_limits<std::size_t>::max();
×
854
                compressed_buf.resize(n); // Throws
2,910✔
855
                continue;
2,910✔
856
            }
2,910✔
857
            if (ec == compression::error::out_of_memory) {
×
858
                grow_arena(compress_memory_arena); // Throws
×
859
                continue;
×
860
            }
×
861
            return ec;
×
862
        }
×
863
        break;
13,608✔
864
    }
16,518✔
865
    compressed_buf.resize(compressed_size);
13,608✔
866
    return std::error_code{};
13,608✔
867
}
13,608✔
868

869
void compression::allocate_and_compress_nonportable(CompressMemoryArena& arena, Span<const char> uncompressed,
870
                                                    util::AppendBuffer<char>& compressed)
871
{
215,511✔
872
    if (uncompressed.size() == 0) {
215,511✔
873
        compressed.resize(0);
71,865✔
874
        return;
71,865✔
875
    }
71,865✔
876

877
    size_t header_size = header_width(uncompressed.size());
143,646✔
878
    compressed.resize(uncompressed.size() + header_size);
143,646✔
879
    size_t compressed_size = 0;
143,646✔
880
    // zlib is ineffective for very small sizes. Measured results indicate that
881
    // it only manages to compress at all past 100 bytes and the compression
882
    // ratio becomes interesting around 200 bytes.
883
    while (uncompressed.size() > 256) {
165,270✔
884
        init_arena(arena);
43,374✔
885
        const int compression_level = 1;
43,374✔
886
        auto ec = compress_lzfse_or_zlib(uncompressed, compressed, compressed_size, compression_level, &arena);
43,374✔
887
        if (ec == error::compress_buffer_too_small) {
43,374✔
888
            // Compressed result was larger than uncompressed, so just store the
889
            // uncompressed
890
            compressed_size = 0;
24✔
891
            break;
24✔
892
        }
24✔
893
        if (ec == compression::error::out_of_memory) {
43,350✔
894
            grow_arena(arena); // Throws
21,624✔
895
            continue;
21,624✔
896
        }
21,624✔
897
        if (ec) {
21,726✔
898
            throw std::system_error(ec);
×
899
        }
×
900
        REALM_ASSERT(compressed_size);
21,726✔
901
        compressed_size += header_size;
21,726✔
902
        record_compression_result(uncompressed.size(), compressed_size);
21,726✔
903
        compressed.resize(compressed_size);
21,726✔
904
        return;
21,726✔
905
    }
21,726✔
906

907
    // If compression made it grow or it was too small to compress then copy
908
    // the source over uncompressed
909
    if (!compressed_size) {
121,932✔
910
        record_compression_result(uncompressed.size(), uncompressed.size() + header_size);
121,932✔
911
        write_header({Algorithm::None, uncompressed.size()}, compressed);
121,932✔
912
        std::memcpy(compressed.data() + header_size, uncompressed.data(), uncompressed.size());
121,932✔
913
    }
121,932✔
914
}
121,920✔
915

916
util::AppendBuffer<char> compression::allocate_and_compress_nonportable(Span<const char> uncompressed_buf)
917
{
196,767✔
918
    util::compression::CompressMemoryArena arena;
196,767✔
919
    util::AppendBuffer<char> compressed;
196,767✔
920
    allocate_and_compress_nonportable(arena, uncompressed_buf, compressed);
196,767✔
921
    return compressed;
196,767✔
922
}
196,767✔
923

924
std::unique_ptr<InputStream> compression::decompress_nonportable_input_stream(InputStream& source, size_t& total_size)
925
{
112,041✔
926
    auto first_block = source.next_block();
112,041✔
927
    auto header = read_header(source, first_block);
112,041✔
928
    if (header.size == std::numeric_limits<size_t>::max())
112,041✔
929
        return nullptr;
6✔
930
    total_size = header.size;
112,035✔
931

932
    if (header.algorithm == Algorithm::None)
112,035✔
933
        return std::make_unique<DecompressInputStreamNone>(source, first_block);
109,041✔
934
#if REALM_USE_LIBCOMPRESSION
1,494✔
935
    if (header.algorithm == Algorithm::Deflate || header.algorithm == Algorithm::Lzfse)
1,497✔
936
        return std::make_unique<DecompressInputStreamLibCompression>(source, first_block, header);
1,494✔
937
#endif
938
    if (header.algorithm == Algorithm::Deflate)
1,500✔
939
        return std::make_unique<DecompressInputStreamZlib>(source, first_block, total_size);
1,500✔
UNCOV
940
    return nullptr;
×
941
}
1,500✔
942

943
size_t compression::get_uncompressed_size_from_header(InputStream& source)
944
{
175,026✔
945
    auto first_block = source.next_block();
175,026✔
946
    return read_header(source, first_block).size;
175,026✔
947
}
175,026✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc