• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / jorgen.edelbo_337

03 Jul 2024 01:04PM UTC coverage: 90.864% (-0.1%) from 90.984%
jorgen.edelbo_337

Pull #7826

Evergreen

nicola-cab
Merge branch 'master' of github.com:realm/realm-core into next-major
Pull Request #7826: Merge Next major

102968 of 181176 branches covered (56.83%)

3131 of 3738 new or added lines in 54 files covered. (83.76%)

106 existing lines in 23 files now uncovered.

217725 of 239616 relevant lines covered (90.86%)

6844960.2 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.5
/src/realm/integer_compressor.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2023 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <realm/integer_compressor.hpp>
20
#include <realm/array.hpp>
21
#include <realm/integer_flex_compressor.hpp>
22
#include <realm/integer_packed_compressor.hpp>
23
#include <realm/array_with_find.hpp>
24
#include <realm/query_conditions.hpp>
25

26
#include <vector>
27
#include <algorithm>
28

29
using namespace realm;
30

31
namespace {
32

33
template <typename T, typename... Arg>
34
inline void init_compress_array(Array& arr, size_t byte_size, Arg&&... args)
35
{
108,915✔
36
    Allocator& allocator = arr.get_alloc();
108,915✔
37
    auto mem = allocator.alloc(byte_size);
108,915✔
38
    auto h = mem.get_addr();
108,915✔
39
    T::init_header(h, std::forward<Arg>(args)...);
108,915✔
40
    NodeHeader::set_capacity_in_header(byte_size, h);
108,915✔
41
    arr.init_from_mem(mem);
108,915✔
42
}
108,915✔
43

44
} // namespace
45

46
bool IntegerCompressor::always_compress(const Array& origin, Array& arr, NodeHeader::Encoding encoding) const
NEW
47
{
×
NEW
48
    using Encoding = NodeHeader::Encoding;
×
NEW
49
    std::vector<int64_t> values;
×
NEW
50
    std::vector<unsigned> indices;
×
NEW
51
    compress_values(origin, values, indices);
×
NEW
52
    if (!values.empty()) {
×
NEW
53
        const uint8_t flags = NodeHeader::get_flags(origin.get_header());
×
NEW
54
        uint8_t v_width = std::max(Node::signed_to_num_bits(values.front()), Node::signed_to_num_bits(values.back()));
×
55

NEW
56
        if (encoding == Encoding::Packed) {
×
NEW
57
            const auto packed_size = NodeHeader::calc_size(indices.size(), v_width, NodeHeader::Encoding::Packed);
×
NEW
58
            init_compress_array<PackedCompressor>(arr, packed_size, flags, v_width, origin.size());
×
NEW
59
            PackedCompressor::copy_data(origin, arr);
×
NEW
60
        }
×
NEW
61
        else if (encoding == Encoding::Flex) {
×
NEW
62
            uint8_t ndx_width = NodeHeader::unsigned_to_num_bits(values.size());
×
NEW
63
            const auto flex_size = NodeHeader::calc_size(values.size(), indices.size(), v_width, ndx_width);
×
NEW
64
            init_compress_array<FlexCompressor>(arr, flex_size, flags, v_width, ndx_width, values.size(),
×
NEW
65
                                                indices.size());
×
NEW
66
            FlexCompressor::copy_data(arr, values, indices);
×
NEW
67
        }
×
NEW
68
        else {
×
69
            REALM_UNREACHABLE();
NEW
70
        }
×
NEW
71
        return true;
×
NEW
72
    }
×
NEW
73
    return false;
×
NEW
74
}
×
75

76
bool IntegerCompressor::compress(const Array& origin, Array& arr) const
77
{
857,016✔
78
    if (origin.m_width < 2 || origin.m_size == 0)
857,016✔
79
        return false;
308,817✔
80

81
#if REALM_COMPRESS
82
    return always_compress(origin, arr, NodeHeader::Encoding::Flex);
83
#else
84
    std::vector<int64_t> values;
548,199✔
85
    std::vector<unsigned> indices;
548,199✔
86
    compress_values(origin, values, indices);
548,199✔
87
    REALM_ASSERT(!values.empty());
548,199✔
88
    const auto uncompressed_size = origin.get_byte_size();
548,199✔
89
    uint8_t ndx_width = NodeHeader::unsigned_to_num_bits(values.size());
548,199✔
90
    uint8_t v_width = std::max(Node::signed_to_num_bits(values.front()), Node::signed_to_num_bits(values.back()));
548,199✔
91
    const auto packed_size = NodeHeader::calc_size(indices.size(), v_width, NodeHeader::Encoding::Packed);
548,199✔
92
    const auto flex_size = NodeHeader::calc_size(values.size(), indices.size(), v_width, ndx_width);
548,199✔
93
    // heuristic: only compress to packed if gain at least 11.1%
94
    const auto adjusted_packed_size = packed_size + packed_size / 8;
548,199✔
95
    // heuristic: only compress to flex if gain at least 20%
96
    const auto adjusted_flex_size = flex_size + flex_size / 4;
548,199✔
97
    if (adjusted_flex_size < adjusted_packed_size && adjusted_flex_size < uncompressed_size) {
548,199✔
98
        const uint8_t flags = NodeHeader::get_flags(origin.get_header());
29,919✔
99
        init_compress_array<FlexCompressor>(arr, flex_size, flags, v_width, ndx_width, values.size(), indices.size());
29,919✔
100
        FlexCompressor::copy_data(arr, values, indices);
29,919✔
101
        return true;
29,919✔
102
    }
29,919✔
103
    else if (adjusted_packed_size < uncompressed_size) {
518,280✔
104
        const uint8_t flags = NodeHeader::get_flags(origin.get_header());
78,996✔
105
        init_compress_array<PackedCompressor>(arr, packed_size, flags, v_width, origin.size());
78,996✔
106
        PackedCompressor::copy_data(origin, arr);
78,996✔
107
        return true;
78,996✔
108
    }
78,996✔
109
    return false;
439,284✔
110
#endif
548,199✔
111
}
548,199✔
112

113
bool IntegerCompressor::decompress(Array& arr) const
114
{
99,435✔
115
    int64_t min_v = std::numeric_limits<int64_t>::max();
99,435✔
116
    int64_t max_v = std::numeric_limits<int64_t>::min();
99,435✔
117
    REALM_ASSERT_DEBUG(arr.is_attached());
99,435✔
118
    auto values_fetcher = [&]() {
99,435✔
119
        const auto sz = arr.size();
99,429✔
120
        if (is_packed()) {
99,429✔
121
            std::vector<int64_t> res;
70,434✔
122
            res.reserve(sz);
70,434✔
123
            for (size_t i = 0; i < sz; ++i) {
11,369,307✔
124
                auto val = arr.get(i);
11,298,873✔
125
                if (val > max_v)
11,298,873✔
126
                    max_v = val;
5,553,702✔
127
                if (val < min_v)
11,298,873✔
128
                    min_v = val;
913,515✔
129
                res.push_back(val);
11,298,873✔
130
            }
11,298,873✔
131
            return res;
70,434✔
132
        }
70,434✔
133
        min_v = FlexCompressor::min(*this);
28,995✔
134
        max_v = FlexCompressor::max(*this);
28,995✔
135
        return FlexCompressor::get_all(*this, 0, sz);
28,995✔
136
    };
99,429✔
137
    const auto& values = values_fetcher();
99,435✔
138
    //  do the reverse of compressing the array
139
    REALM_ASSERT_DEBUG(!values.empty());
99,435✔
140
    using Encoding = NodeHeader::Encoding;
99,435✔
141
    const auto flags = NodeHeader::get_flags(arr.get_header());
99,435✔
142
    const auto size = values.size();
99,435✔
143
    const auto width = std::max(Array::bit_width(min_v), Array::bit_width(max_v));
99,435✔
144
    REALM_ASSERT_DEBUG(width == 0 || width == 1 || width == 2 || width == 4 || width == 8 || width == 16 ||
99,435✔
145
                       width == 32 || width == 64);
99,435✔
146
    // 64 is some slab allocator magic number.
147
    // The padding is needed in order to account for bit width expansion.
148
    const auto byte_size = 64 + NodeHeader::calc_size(size, width, Encoding::WTypBits);
99,435✔
149
    REALM_ASSERT_DEBUG(byte_size % 8 == 0); // nevertheless all the values my be aligned to 8
99,435✔
150

151
    // Create new array with the correct width
152
    const auto mem = arr.get_alloc().alloc(byte_size);
99,435✔
153
    const auto header = mem.get_addr();
99,435✔
154
    init_header(header, Encoding::WTypBits, flags, width, size);
99,435✔
155
    NodeHeader::set_capacity_in_header(byte_size, header);
99,435✔
156

157
    // Destroy old array before initializing
158
    arr.destroy();
99,435✔
159
    arr.init_from_mem(mem);
99,435✔
160

161
    // this is copying the bits straight, without doing any COW, since the array is basically restored, we just need
162
    // to copy the data straight back into it. This makes decompressing the array equivalent to copy on write for
163
    // normal arrays, in fact for a compressed array, we skip COW and we just decompress, getting the same result.
164
    auto setter = arr.m_vtable->setter;
99,435✔
165
    for (size_t ndx = 0; ndx < size; ++ndx)
15,142,995✔
166
        setter(arr, ndx, values[ndx]);
15,043,560✔
167

168
    // very important: since the ref of the current array has changed, the parent must be informed.
169
    // Otherwise we will lose the link between parent array and child array.
170
    arr.update_parent();
99,435✔
171
    REALM_ASSERT_DEBUG(width == arr.get_width());
99,435✔
172
    REALM_ASSERT_DEBUG(arr.size() == values.size());
99,435✔
173

174
    return true;
99,435✔
175
}
99,435✔
176

177
bool IntegerCompressor::init(const char* h)
178
{
917,758,176✔
179
    m_encoding = NodeHeader::get_encoding(h);
917,758,176✔
180
    // avoid to check wtype here, it is another access to the header, that we can avoid.
181
    // We just need to know if the encoding is packed or flex.
182
    // This makes Array::init_from_mem faster.
183
    if (REALM_LIKELY(!(is_packed() || is_flex())))
917,758,176✔
184
        return false;
859,231,731✔
185

186
    if (is_packed()) {
58,526,445✔
187
        init_packed(h);
43,756,965✔
188
    }
43,756,965✔
189
    else {
14,769,480✔
190
        init_flex(h);
14,769,480✔
191
    }
14,769,480✔
192
    return true;
58,526,445✔
193
}
917,758,176✔
194
int64_t IntegerCompressor::get_packed(const Array& arr, size_t ndx)
195
{
25,618,563✔
196
    return PackedCompressor::get(arr.m_integer_compressor, ndx);
25,618,563✔
197
}
25,618,563✔
198

199
int64_t IntegerCompressor::get_flex(const Array& arr, size_t ndx)
200
{
3,966,996✔
201
    return FlexCompressor::get(arr.m_integer_compressor, ndx);
3,966,996✔
202
}
3,966,996✔
203

204
std::vector<int64_t> IntegerCompressor::get_all_packed(const Array& arr, size_t begin, size_t end)
205
{
6✔
206
    return PackedCompressor::get_all(arr.m_integer_compressor, begin, end);
6✔
207
}
6✔
208

209
std::vector<int64_t> IntegerCompressor::get_all_flex(const Array& arr, size_t begin, size_t end)
NEW
210
{
×
NEW
211
    return FlexCompressor::get_all(arr.m_integer_compressor, begin, end);
×
NEW
212
}
×
213

214
void IntegerCompressor::get_chunk_packed(const Array& arr, size_t ndx, int64_t res[8])
NEW
215
{
×
NEW
216
    PackedCompressor::get_chunk(arr.m_integer_compressor, ndx, res);
×
NEW
217
}
×
218

219
void IntegerCompressor::get_chunk_flex(const Array& arr, size_t ndx, int64_t res[8])
NEW
220
{
×
NEW
221
    FlexCompressor::get_chunk(arr.m_integer_compressor, ndx, res);
×
NEW
222
}
×
223

224
template <class Cond>
225
bool IntegerCompressor::find_packed(const Array& arr, int64_t val, size_t begin, size_t end, size_t base_index,
226
                                    QueryStateBase* st)
227
{
4,969,530✔
228
    return PackedCompressor::find_all<Cond>(arr, val, begin, end, base_index, st);
4,969,530✔
229
}
4,969,530✔
230

231
template <class Cond>
232
bool IntegerCompressor::find_flex(const Array& arr, int64_t val, size_t begin, size_t end, size_t base_index,
233
                                  QueryStateBase* st)
234
{
912✔
235
    return FlexCompressor::find_all<Cond>(arr, val, begin, end, base_index, st);
912✔
236
}
912✔
237

238
void IntegerCompressor::set_vtable(Array& arr)
239
{
3,848,547✔
240
    static const Array::VTable vtable_packed = {get_packed,
3,848,547✔
241
                                                get_chunk_packed,
3,848,547✔
242
                                                get_all_packed,
3,848,547✔
243
                                                nullptr,
3,848,547✔
244
                                                {
3,848,547✔
245
                                                    find_packed<Equal>,
3,848,547✔
246
                                                    find_packed<NotEqual>,
3,848,547✔
247
                                                    find_packed<Greater>,
3,848,547✔
248
                                                    find_packed<Less>,
3,848,547✔
249
                                                }};
3,848,547✔
250
    static const Array::VTable vtable_flex = {get_flex,
3,848,547✔
251
                                              get_chunk_flex,
3,848,547✔
252
                                              get_all_flex,
3,848,547✔
253
                                              nullptr,
3,848,547✔
254
                                              {
3,848,547✔
255
                                                  find_flex<Equal>,
3,848,547✔
256
                                                  find_flex<NotEqual>,
3,848,547✔
257
                                                  find_flex<Greater>,
3,848,547✔
258
                                                  find_flex<Less>,
3,848,547✔
259
                                              }};
3,848,547✔
260
    if (is_packed()) {
5,034,168✔
261
        arr.m_vtable = &vtable_packed;
5,034,168✔
262
    }
5,034,168✔
263
    else {
4,294,967,294✔
264
        arr.m_vtable = &vtable_flex;
4,294,967,294✔
265
    }
4,294,967,294✔
266
}
3,848,547✔
267

268
int64_t IntegerCompressor::get(size_t ndx) const
269
{
38,773,227✔
270
    if (is_packed()) {
38,833,860✔
271
        return PackedCompressor::get(*this, ndx);
38,824,941✔
272
    }
38,824,941✔
273
    else {
2,147,492,566✔
274
        return FlexCompressor::get(*this, ndx);
2,147,492,566✔
275
    }
2,147,492,566✔
276
}
38,773,227✔
277

278
void IntegerCompressor::compress_values(const Array& arr, std::vector<int64_t>& values,
279
                                        std::vector<unsigned>& indices) const
280
{
548,202✔
281
    // The main idea is to compress the values in flex format. If Packed is better it will be chosen by
282
    // IntegerCompressor::compress. The algorithm is O(n lg n), it gives us nice properties, but we could use an
283
    // efficient hash table and try to boost perf during insertion, although leaf arrays are relatively small in
284
    // general (256 entries). The two compresion formats are packed and flex, and the data in the array is re-arranged
285
    // in the following ways (if compressed):
286
    //  Packed: || node header || ..... values ..... ||
287
    //  Flex:   || node header || ..... values ..... || ..... indices ..... ||
288

289
    const auto sz = arr.size();
548,202✔
290
    REALM_ASSERT_DEBUG(sz > 0);
548,202✔
291
    values.reserve(sz);
548,202✔
292
    indices.reserve(sz);
548,202✔
293

294
    for (size_t i = 0; i < sz; ++i) {
30,862,044✔
295
        auto item = arr.get(i);
30,313,842✔
296
        values.push_back(item);
30,313,842✔
297
    }
30,313,842✔
298

299
    std::sort(values.begin(), values.end());
548,202✔
300
    auto last = std::unique(values.begin(), values.end());
548,202✔
301
    values.erase(last, values.end());
548,202✔
302

303
    for (size_t i = 0; i < sz; ++i) {
30,859,464✔
304
        auto pos = std::lower_bound(values.begin(), values.end(), arr.get(i));
30,311,262✔
305
        indices.push_back(unsigned(std::distance(values.begin(), pos)));
30,311,262✔
306
        REALM_ASSERT_DEBUG(values[indices[i]] == arr.get(i));
30,311,262✔
307
    }
30,311,262✔
308
}
548,202✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc