• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / jorgen.edelbo_385

12 Aug 2024 01:14PM UTC coverage: 91.1% (-0.007%) from 91.107%
jorgen.edelbo_385

Pull #7826

Evergreen

jedelbo
Merge tag 'v14.12.0' into next-major
Pull Request #7826: Merge Next major

103514 of 182200 branches covered (56.81%)

3132 of 3493 new or added lines in 52 files covered. (89.67%)

154 existing lines in 17 files now uncovered.

219973 of 241462 relevant lines covered (91.1%)

6545726.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.5
/src/realm/integer_compressor.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2023 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <realm/integer_compressor.hpp>
20
#include <realm/array.hpp>
21
#include <realm/integer_flex_compressor.hpp>
22
#include <realm/integer_packed_compressor.hpp>
23
#include <realm/array_with_find.hpp>
24
#include <realm/query_conditions.hpp>
25

26
#include <vector>
27
#include <algorithm>
28

29
using namespace realm;
30

31
namespace {
32

33
template <typename T, typename... Arg>
34
inline void init_compress_array(Array& arr, size_t byte_size, Arg&&... args)
35
{
112,833✔
36
    Allocator& allocator = arr.get_alloc();
112,833✔
37
    auto mem = allocator.alloc(byte_size);
112,833✔
38
    auto h = mem.get_addr();
112,833✔
39
    T::init_header(h, std::forward<Arg>(args)...);
112,833✔
40
    NodeHeader::set_capacity_in_header(byte_size, h);
112,833✔
41
    arr.init_from_mem(mem);
112,833✔
42
}
112,833✔
43

44
} // namespace
45

46
bool IntegerCompressor::always_compress(const Array& origin, Array& arr, NodeHeader::Encoding encoding) const
NEW
47
{
×
NEW
48
    using Encoding = NodeHeader::Encoding;
×
NEW
49
    std::vector<int64_t> values;
×
NEW
50
    std::vector<unsigned> indices;
×
NEW
51
    compress_values(origin, values, indices);
×
NEW
52
    if (!values.empty()) {
×
NEW
53
        const uint8_t flags = NodeHeader::get_flags(origin.get_header());
×
NEW
54
        uint8_t v_width = std::max(Node::signed_to_num_bits(values.front()), Node::signed_to_num_bits(values.back()));
×
55

NEW
56
        if (encoding == Encoding::Packed) {
×
NEW
57
            const auto packed_size = NodeHeader::calc_size(indices.size(), v_width, NodeHeader::Encoding::Packed);
×
NEW
58
            init_compress_array<PackedCompressor>(arr, packed_size, flags, v_width, origin.size());
×
NEW
59
            PackedCompressor::copy_data(origin, arr);
×
NEW
60
        }
×
NEW
61
        else if (encoding == Encoding::Flex) {
×
NEW
62
            uint8_t ndx_width = NodeHeader::unsigned_to_num_bits(values.size());
×
NEW
63
            const auto flex_size = NodeHeader::calc_size(values.size(), indices.size(), v_width, ndx_width);
×
NEW
64
            init_compress_array<FlexCompressor>(arr, flex_size, flags, v_width, ndx_width, values.size(),
×
NEW
65
                                                indices.size());
×
NEW
66
            FlexCompressor::copy_data(arr, values, indices);
×
NEW
67
        }
×
NEW
68
        else {
×
69
            REALM_UNREACHABLE();
NEW
70
        }
×
NEW
71
        return true;
×
NEW
72
    }
×
NEW
73
    return false;
×
NEW
74
}
×
75

76
bool IntegerCompressor::compress(const Array& origin, Array& arr) const
77
{
879,087✔
78
    if (origin.m_width < 2 || origin.m_size == 0)
879,087✔
79
        return false;
320,166✔
80

81
#if REALM_COMPRESS
82
    return always_compress(origin, arr, NodeHeader::Encoding::Flex);
83
#else
84
    std::vector<int64_t> values;
558,921✔
85
    std::vector<unsigned> indices;
558,921✔
86
    compress_values(origin, values, indices);
558,921✔
87
    REALM_ASSERT(!values.empty());
558,921✔
88
    const auto uncompressed_size = origin.get_byte_size();
558,921✔
89
    uint8_t ndx_width = NodeHeader::unsigned_to_num_bits(values.size());
558,921✔
90
    uint8_t v_width = std::max(Node::signed_to_num_bits(values.front()), Node::signed_to_num_bits(values.back()));
558,921✔
91
    const auto packed_size = NodeHeader::calc_size(indices.size(), v_width, NodeHeader::Encoding::Packed);
558,921✔
92
    const auto flex_size = NodeHeader::calc_size(values.size(), indices.size(), v_width, ndx_width);
558,921✔
93
    // heuristic: only compress to packed if gain at least 11.1%
94
    const auto adjusted_packed_size = packed_size + packed_size / 8;
558,921✔
95
    // heuristic: only compress to flex if gain at least 20%
96
    const auto adjusted_flex_size = flex_size + flex_size / 4;
558,921✔
97
    if (adjusted_flex_size < adjusted_packed_size && adjusted_flex_size < uncompressed_size) {
558,921✔
98
        const uint8_t flags = NodeHeader::get_flags(origin.get_header());
32,793✔
99
        init_compress_array<FlexCompressor>(arr, flex_size, flags, v_width, ndx_width, values.size(), indices.size());
32,793✔
100
        FlexCompressor::copy_data(arr, values, indices);
32,793✔
101
        return true;
32,793✔
102
    }
32,793✔
103
    else if (adjusted_packed_size < uncompressed_size) {
526,128✔
104
        const uint8_t flags = NodeHeader::get_flags(origin.get_header());
80,040✔
105
        init_compress_array<PackedCompressor>(arr, packed_size, flags, v_width, origin.size());
80,040✔
106
        PackedCompressor::copy_data(origin, arr);
80,040✔
107
        return true;
80,040✔
108
    }
80,040✔
109
    return false;
446,088✔
110
#endif
558,921✔
111
}
558,921✔
112

113
bool IntegerCompressor::decompress(Array& arr) const
114
{
103,140✔
115
    int64_t min_v = std::numeric_limits<int64_t>::max();
103,140✔
116
    int64_t max_v = std::numeric_limits<int64_t>::min();
103,140✔
117
    REALM_ASSERT_DEBUG(arr.is_attached());
103,140✔
118
    auto values_fetcher = [&]() {
103,140✔
119
        const auto sz = arr.size();
103,140✔
120
        if (is_packed()) {
103,140✔
121
            std::vector<int64_t> res;
71,274✔
122
            res.reserve(sz);
71,274✔
123
            for (size_t i = 0; i < sz; ++i) {
11,409,879✔
124
                auto val = arr.get(i);
11,338,605✔
125
                if (val > max_v)
11,338,605✔
126
                    max_v = val;
5,569,764✔
127
                if (val < min_v)
11,338,605✔
128
                    min_v = val;
920,952✔
129
                res.push_back(val);
11,338,605✔
130
            }
11,338,605✔
131
            return res;
71,274✔
132
        }
71,274✔
133
        min_v = FlexCompressor::min(*this);
31,866✔
134
        max_v = FlexCompressor::max(*this);
31,866✔
135
        return FlexCompressor::get_all(*this, 0, sz);
31,866✔
136
    };
103,140✔
137
    const auto& values = values_fetcher();
103,140✔
138
    //  do the reverse of compressing the array
139
    REALM_ASSERT_DEBUG(!values.empty());
103,140✔
140
    using Encoding = NodeHeader::Encoding;
103,140✔
141
    const auto flags = NodeHeader::get_flags(arr.get_header());
103,140✔
142
    const auto size = values.size();
103,140✔
143
    const auto width = std::max(Array::bit_width(min_v), Array::bit_width(max_v));
103,140✔
144
    REALM_ASSERT_DEBUG(width == 0 || width == 1 || width == 2 || width == 4 || width == 8 || width == 16 ||
103,140✔
145
                       width == 32 || width == 64);
103,140✔
146
    // 64 is some slab allocator magic number.
147
    // The padding is needed in order to account for bit width expansion.
148
    const auto byte_size = 64 + NodeHeader::calc_size(size, width, Encoding::WTypBits);
103,140✔
149
    REALM_ASSERT_DEBUG(byte_size % 8 == 0); // nevertheless all the values my be aligned to 8
103,140✔
150

151
    // Create new array with the correct width
152
    const auto mem = arr.get_alloc().alloc(byte_size);
103,140✔
153
    const auto header = mem.get_addr();
103,140✔
154
    init_header(header, Encoding::WTypBits, flags, width, size);
103,140✔
155
    NodeHeader::set_capacity_in_header(byte_size, header);
103,140✔
156

157
    // Destroy old array before initializing
158
    arr.destroy();
103,140✔
159
    arr.init_from_mem(mem);
103,140✔
160

161
    // this is copying the bits straight, without doing any COW, since the array is basically restored, we just need
162
    // to copy the data straight back into it. This makes decompressing the array equivalent to copy on write for
163
    // normal arrays, in fact for a compressed array, we skip COW and we just decompress, getting the same result.
164
    auto setter = arr.m_vtable->setter;
103,140✔
165
    for (size_t ndx = 0; ndx < size; ++ndx)
15,126,924✔
166
        setter(arr, ndx, values[ndx]);
15,023,784✔
167

168
    // very important: since the ref of the current array has changed, the parent must be informed.
169
    // Otherwise we will lose the link between parent array and child array.
170
    arr.update_parent();
103,140✔
171
    REALM_ASSERT_DEBUG(width == arr.get_width());
103,140✔
172
    REALM_ASSERT_DEBUG(arr.size() == values.size());
103,140✔
173

174
    return true;
103,140✔
175
}
103,140✔
176

177
bool IntegerCompressor::init(const char* h)
178
{
926,670,129✔
179
    m_encoding = NodeHeader::get_encoding(h);
926,670,129✔
180
    // avoid to check wtype here, it is another access to the header, that we can avoid.
181
    // We just need to know if the encoding is packed or flex.
182
    // This makes Array::init_from_mem faster.
183
    if (REALM_LIKELY(!(is_packed() || is_flex())))
926,670,129✔
184
        return false;
867,388,245✔
185

186
    if (is_packed()) {
59,281,884✔
187
        init_packed(h);
43,603,611✔
188
    }
43,603,611✔
189
    else {
15,678,273✔
190
        init_flex(h);
15,678,273✔
191
    }
15,678,273✔
192
    return true;
59,281,884✔
193
}
926,670,129✔
194
int64_t IntegerCompressor::get_packed(const Array& arr, size_t ndx)
195
{
25,724,343✔
196
    return PackedCompressor::get(arr.m_integer_compressor, ndx);
25,724,343✔
197
}
25,724,343✔
198

199
int64_t IntegerCompressor::get_flex(const Array& arr, size_t ndx)
200
{
3,910,629✔
201
    return FlexCompressor::get(arr.m_integer_compressor, ndx);
3,910,629✔
202
}
3,910,629✔
203

204
std::vector<int64_t> IntegerCompressor::get_all_packed(const Array& arr, size_t begin, size_t end)
205
{
6✔
206
    return PackedCompressor::get_all(arr.m_integer_compressor, begin, end);
6✔
207
}
6✔
208

209
std::vector<int64_t> IntegerCompressor::get_all_flex(const Array& arr, size_t begin, size_t end)
NEW
210
{
×
NEW
211
    return FlexCompressor::get_all(arr.m_integer_compressor, begin, end);
×
NEW
212
}
×
213

214
void IntegerCompressor::get_chunk_packed(const Array& arr, size_t ndx, int64_t res[8])
NEW
215
{
×
NEW
216
    PackedCompressor::get_chunk(arr.m_integer_compressor, ndx, res);
×
NEW
217
}
×
218

219
void IntegerCompressor::get_chunk_flex(const Array& arr, size_t ndx, int64_t res[8])
NEW
220
{
×
NEW
221
    FlexCompressor::get_chunk(arr.m_integer_compressor, ndx, res);
×
NEW
222
}
×
223

224
template <class Cond>
225
bool IntegerCompressor::find_packed(const Array& arr, int64_t val, size_t begin, size_t end, size_t base_index,
226
                                    QueryStateBase* st)
227
{
5,337,693✔
228
    return PackedCompressor::find_all<Cond>(arr, val, begin, end, base_index, st);
5,337,693✔
229
}
5,337,693✔
230

231
template <class Cond>
232
bool IntegerCompressor::find_flex(const Array& arr, int64_t val, size_t begin, size_t end, size_t base_index,
233
                                  QueryStateBase* st)
234
{
912✔
235
    return FlexCompressor::find_all<Cond>(arr, val, begin, end, base_index, st);
912✔
236
}
912✔
237

238
void IntegerCompressor::set_vtable(Array& arr)
239
{
4,760,487✔
240
    static const Array::VTable vtable_packed = {get_packed,
4,760,487✔
241
                                                get_chunk_packed,
4,760,487✔
242
                                                get_all_packed,
4,760,487✔
243
                                                nullptr,
4,760,487✔
244
                                                {
4,760,487✔
245
                                                    find_packed<Equal>,
4,760,487✔
246
                                                    find_packed<NotEqual>,
4,760,487✔
247
                                                    find_packed<Greater>,
4,760,487✔
248
                                                    find_packed<Less>,
4,760,487✔
249
                                                }};
4,760,487✔
250
    static const Array::VTable vtable_flex = {get_flex,
4,760,487✔
251
                                              get_chunk_flex,
4,760,487✔
252
                                              get_all_flex,
4,760,487✔
253
                                              nullptr,
4,760,487✔
254
                                              {
4,760,487✔
255
                                                  find_flex<Equal>,
4,760,487✔
256
                                                  find_flex<NotEqual>,
4,760,487✔
257
                                                  find_flex<Greater>,
4,760,487✔
258
                                                  find_flex<Less>,
4,760,487✔
259
                                              }};
4,760,487✔
260
    if (is_packed()) {
5,412,393✔
261
        arr.m_vtable = &vtable_packed;
5,341,836✔
262
    }
5,341,836✔
263
    else {
2,147,554,204✔
264
        arr.m_vtable = &vtable_flex;
2,147,554,204✔
265
    }
2,147,554,204✔
266
}
4,760,487✔
267

268
int64_t IntegerCompressor::get(size_t ndx) const
269
{
38,917,485✔
270
    if (is_packed()) {
38,917,485✔
271
        return PackedCompressor::get(*this, ndx);
38,866,890✔
272
    }
38,866,890✔
273
    else {
50,595✔
274
        return FlexCompressor::get(*this, ndx);
50,595✔
275
    }
50,595✔
276
}
38,917,485✔
277

278
void IntegerCompressor::compress_values(const Array& arr, std::vector<int64_t>& values,
279
                                        std::vector<unsigned>& indices) const
280
{
558,942✔
281
    // The main idea is to compress the values in flex format. If Packed is better it will be chosen by
282
    // IntegerCompressor::compress. The algorithm is O(n lg n), it gives us nice properties, but we could use an
283
    // efficient hash table and try to boost perf during insertion, although leaf arrays are relatively small in
284
    // general (256 entries). The two compresion formats are packed and flex, and the data in the array is re-arranged
285
    // in the following ways (if compressed):
286
    //  Packed: || node header || ..... values ..... ||
287
    //  Flex:   || node header || ..... values ..... || ..... indices ..... ||
288

289
    const auto sz = arr.size();
558,942✔
290
    REALM_ASSERT_DEBUG(sz > 0);
558,942✔
291
    values.reserve(sz);
558,942✔
292
    indices.reserve(sz);
558,942✔
293

294
    for (size_t i = 0; i < sz; ++i) {
31,031,841✔
295
        auto item = arr.get(i);
30,472,899✔
296
        values.push_back(item);
30,472,899✔
297
    }
30,472,899✔
298

299
    std::sort(values.begin(), values.end());
558,942✔
300
    auto last = std::unique(values.begin(), values.end());
558,942✔
301
    values.erase(last, values.end());
558,942✔
302

303
    for (size_t i = 0; i < sz; ++i) {
31,031,868✔
304
        auto pos = std::lower_bound(values.begin(), values.end(), arr.get(i));
30,472,926✔
305
        indices.push_back(unsigned(std::distance(values.begin(), pos)));
30,472,926✔
306
        REALM_ASSERT_DEBUG(values[indices[i]] == arr.get(i));
30,472,926✔
307
    }
30,472,926✔
308
}
558,942✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc