• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / 1664

13 Sep 2023 01:20PM UTC coverage: 91.218% (-0.003%) from 91.221%
1664

push

Evergreen

GitHub
More detailed logging (#6971)

95892 of 175862 branches covered (0.0%)

55 of 71 new or added lines in 6 files covered. (77.46%)

93 existing lines in 14 files now uncovered.

233636 of 256129 relevant lines covered (91.22%)

7415075.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.61
/src/realm/group_writer.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <algorithm>
20
#include <set>
21

22
#include <iostream>
23

24
#include <realm/group_writer.hpp>
25

26
#include <realm/alloc_slab.hpp>
27
#include <realm/transaction.hpp>
28
#include <realm/disable_sync_to_disk.hpp>
29
#include <realm/impl/destroy_guard.hpp>
30
#include <realm/impl/simulated_failure.hpp>
31
#include <realm/metrics/metric_timer.hpp>
32
#include <realm/util/miscellaneous.hpp>
33
#include <realm/util/safe_int_ops.hpp>
34

35
using namespace realm;
36
using namespace realm::util;
37
using namespace realm::metrics;
38

39
namespace realm {
40
class InMemoryWriter : public _impl::ArrayWriterBase {
41
public:
42
    InMemoryWriter(GroupWriter& owner)
43
        : m_owner(owner)
44
        , m_alloc(owner.m_alloc)
45
    {
81,453✔
46
    }
81,453✔
47
    ref_type write_array(const char* data, size_t size, uint32_t checksum) override
48
    {
5,003,703✔
49
        size_t pos = m_owner.get_free_space(size);
5,003,703✔
50

2,502,456✔
51
        // Write the block
2,502,456✔
52
        char* dest_addr = translate(pos);
5,003,703✔
53
        REALM_ASSERT_RELEASE(dest_addr && (reinterpret_cast<size_t>(dest_addr) & 7) == 0);
5,003,703✔
54
        memcpy(dest_addr, &checksum, 4);
5,003,703✔
55
        memcpy(dest_addr + 4, data + 4, size - 4);
5,003,703✔
56
        // return ref of the written array
2,502,456✔
57
        ref_type ref = to_ref(pos);
5,003,703✔
58
        return ref;
5,003,703✔
59
    }
5,003,703✔
60
    char* translate(ref_type ref)
61
    {
5,329,515✔
62
        return m_alloc.translate_memory_pos(ref);
5,329,515✔
63
    }
5,329,515✔
64

65
private:
66
    GroupWriter& m_owner;
67
    SlabAlloc& m_alloc;
68
};
69
} // namespace realm
70

71

72
// Class controlling a memory mapped window into a file
73
class GroupWriter::MapWindow {
74
public:
75
    MapWindow(size_t alignment, util::File& f, ref_type start_ref, size_t initial_size,
76
              util::WriteMarker* write_marker = nullptr);
77
    ~MapWindow();
78

79
    // translate a ref to a pointer
80
    // inside the window defined during construction.
81
    char* translate(ref_type ref);
82
    void encryption_read_barrier(void* start_addr, size_t size);
83
    void encryption_write_barrier(void* start_addr, size_t size);
84
    // flush from private to shared cache
85
    void flush();
86
    // sync to disk (including flush as needed)
87
    void sync();
88
    // return true if the specified range is fully visible through
89
    // the MapWindow
90
    bool matches(ref_type start_ref, size_t size);
91
    // return false if the mapping cannot be extended to hold the
92
    // requested size - extends if possible and then returns true
93
    bool extends_to_match(util::File& f, ref_type start_ref, size_t size);
94

95
private:
96
    util::File::Map<char> m_map;
97
    ref_type m_base_ref;
98
    ref_type aligned_to_mmap_block(ref_type start_ref);
99
    size_t get_window_size(util::File& f, ref_type start_ref, size_t size);
100
    size_t m_alignment;
101
};
102

103
// True if a requested block fall within a memory mapping.
104
bool GroupWriter::MapWindow::matches(ref_type start_ref, size_t size)
105
{
19,411,233✔
106
    if (start_ref < m_base_ref)
19,411,233✔
107
        return false;
×
108
    if (start_ref + size > m_base_ref + m_map.get_size())
19,411,233✔
109
        return false;
23,625✔
110
    return true;
19,387,608✔
111
}
19,387,608✔
112

113
// When determining which part of the file to mmap, We try to pick a 1MB window containing
114
// the requested block. We align windows on 1MB boundaries. We also align window size at
115
// 1MB, except in cases where the referenced part of the file straddles a 1MB boundary.
116
// In that case we choose a larger window.
117
//
118
// In cases where a 1MB window would stretch beyond the end of the file, we choose
119
// a smaller window. Anything mapped after the end of file would be undefined anyways.
120
ref_type GroupWriter::MapWindow::aligned_to_mmap_block(ref_type start_ref)
121
{
1,304,994✔
122
    // align to 1MB boundary
662,766✔
123
    size_t page_mask = m_alignment - 1;
1,304,994✔
124
    return start_ref & ~page_mask;
1,304,994✔
125
}
1,304,994✔
126

127
size_t GroupWriter::MapWindow::get_window_size(util::File& f, ref_type start_ref, size_t size)
128
{
1,305,000✔
129
    size_t window_size = start_ref + size - m_base_ref;
1,305,000✔
130
    // always map at least to match alignment
662,772✔
131
    if (window_size < m_alignment)
1,305,000✔
132
        window_size = m_alignment;
1,305,006✔
133
    // but never map beyond end of file
662,772✔
134
    size_t file_size = to_size_t(f.get_size());
1,305,000✔
135
    REALM_ASSERT_DEBUG_EX(start_ref + size <= file_size, start_ref + size, file_size);
1,305,000✔
136
    if (window_size > file_size - m_base_ref)
1,305,000✔
137
        window_size = file_size - m_base_ref;
1,304,979✔
138
    return window_size;
1,305,000✔
139
}
1,305,000✔
140

141
// The file may grow in increments much smaller than 1MB. This can lead to a stream of requests
142
// which are each just beyond the end of the last mapping we made. It is important to extend the
143
// existing window to cover the new request (if possible) as opposed to adding a new window.
144
// The reason is not obvious: open windows need to be sync'ed to disk at the end of the commit,
145
// and we really want to use as few calls to msync() as possible.
146
//
147
// extends_to_match() will extend an existing mapping to accomodate a new request if possible
148
// and return true. If the request falls in a different 1MB window, it'll return false.
149
bool GroupWriter::MapWindow::extends_to_match(util::File& f, ref_type start_ref, size_t size)
150
{
23,625✔
151
    size_t aligned_ref = aligned_to_mmap_block(start_ref);
23,625✔
152
    if (aligned_ref != m_base_ref)
23,625✔
153
        return false;
×
154
    size_t window_size = get_window_size(f, start_ref, size);
23,625✔
155
    m_map.sync();
23,625✔
156
    m_map.unmap();
23,625✔
157
    m_map.map(f, File::access_ReadWrite, window_size, 0, m_base_ref);
23,625✔
158
    return true;
23,625✔
159
}
23,625✔
160

161
GroupWriter::MapWindow::MapWindow(size_t alignment, util::File& f, ref_type start_ref, size_t size,
162
                                  util::WriteMarker* write_marker)
163
    : m_alignment(alignment)
164
{
1,281,360✔
165
    m_base_ref = aligned_to_mmap_block(start_ref);
1,281,360✔
166
    size_t window_size = get_window_size(f, start_ref, size);
1,281,360✔
167
    m_map.map(f, File::access_ReadWrite, window_size, 0, m_base_ref);
1,281,360✔
168
#if REALM_ENABLE_ENCRYPTION
1,281,360✔
169
    if (auto p = m_map.get_encrypted_mapping())
1,281,360✔
170
        p->set_marker(write_marker);
321✔
171
#else
172
    static_cast<void>(write_marker);
173
#endif
174
}
1,281,360✔
175

176
GroupWriter::MapWindow::~MapWindow()
177
{
1,281,324✔
178
    m_map.sync();
1,281,324✔
179
    m_map.unmap();
1,281,324✔
180
}
1,281,324✔
181

182
void GroupWriter::MapWindow::flush()
183
{
2,475,645✔
184
    m_map.flush();
2,475,645✔
185
}
2,475,645✔
186

187
void GroupWriter::MapWindow::sync()
188
{
7,932✔
189
    flush();
7,932✔
190
    m_map.sync();
7,932✔
191
}
7,932✔
192

193
char* GroupWriter::MapWindow::translate(ref_type ref)
194
{
25,809,903✔
195
    return m_map.get_addr() + (ref - m_base_ref);
25,809,903✔
196
}
25,809,903✔
197

198
void GroupWriter::MapWindow::encryption_read_barrier(void* start_addr, size_t size)
199
{
21,883,533✔
200
    realm::util::encryption_read_barrier_for_write(start_addr, size, m_map.get_encrypted_mapping());
21,883,533✔
201
}
21,883,533✔
202

203
void GroupWriter::MapWindow::encryption_write_barrier(void* start_addr, size_t size)
204
{
21,882,825✔
205
    realm::util::encryption_write_barrier(start_addr, size, m_map.get_encrypted_mapping());
21,882,825✔
206
}
21,882,825✔
207

208

209
GroupWriter::GroupWriter(Transaction& group, Durability dura, WriteMarker* write_marker)
210
    : m_group(group)
211
    , m_alloc(group.m_alloc)
212
    , m_free_positions(m_alloc)
213
    , m_free_lengths(m_alloc)
214
    , m_free_versions(m_alloc)
215
    , m_durability(dura)
216
    , m_write_marker(write_marker)
217
{
1,362,801✔
218
    m_map_windows.reserve(num_map_windows);
1,362,801✔
219
#if REALM_PLATFORM_APPLE && REALM_MOBILE
220
    m_window_alignment = 1 * 1024 * 1024; // 1M
221
#else
222
    if (sizeof(int*) == 4) {                  // 32 bit address space
1,362,801✔
223
        m_window_alignment = 1 * 1024 * 1024; // 1M
×
224
    }
×
225
    else {
1,362,801✔
226
        // large address space - just choose a size so that we have a single window
683,586✔
227
        size_t total_size = m_alloc.get_total_size();
1,362,801✔
228
        size_t wanted_size = 1;
1,362,801✔
229
        while (total_size) {
38,164,530✔
230
            total_size >>= 1;
36,801,729✔
231
            wanted_size <<= 1;
36,801,729✔
232
        }
36,801,729✔
233
        if (wanted_size < 1 * 1024 * 1024)
1,362,801✔
234
            wanted_size = 1 * 1024 * 1024; // minimum 1M
18✔
235
        m_window_alignment = wanted_size;
1,362,801✔
236
    }
1,362,801✔
237
#endif
1,362,801✔
238
    Array& top = m_group.m_top;
1,362,801✔
239
    m_logical_size = size_t(top.get_as_ref_or_tagged(Group::s_file_size_ndx).get_as_int());
1,362,801✔
240

683,586✔
241
    // When we make a commit, we will at least need room for the version
683,586✔
242
    while (top.size() <= Group::s_version_ndx) {
1,364,649✔
243
        top.add(0); // Throws
1,848✔
244
    }
1,848✔
245

683,586✔
246
    m_free_positions.set_parent(&top, Group::s_free_pos_ndx);
1,362,801✔
247
    m_free_lengths.set_parent(&top, Group::s_free_size_ndx);
1,362,801✔
248
    m_free_versions.set_parent(&top, Group::s_free_version_ndx);
1,362,801✔
249

683,586✔
250
    ref_type free_positions_ref = m_free_positions.get_ref_from_parent();
1,362,801✔
251
    if (free_positions_ref) {
1,362,801✔
252
        m_free_positions.init_from_ref(free_positions_ref);
1,289,598✔
253
    }
1,289,598✔
254
    else {
73,203✔
255
        m_free_positions.create(Array::type_Normal); // Throws
73,203✔
256
        _impl::DestroyGuard<Array> dg(&m_free_positions);
73,203✔
257
        m_free_positions.update_parent(); // Throws
73,203✔
258
        dg.release();
73,203✔
259
    }
73,203✔
260

683,586✔
261
    if (ref_type ref = m_free_lengths.get_ref_from_parent()) {
1,362,801✔
262
        m_free_lengths.init_from_ref(ref);
1,289,583✔
263
        REALM_ASSERT_RELEASE_EX(m_free_positions.size() == m_free_lengths.size(), top.get_ref(),
1,289,583✔
264
                                m_free_positions.size(), m_free_lengths.size());
1,289,583✔
265
    }
1,289,583✔
266
    else {
73,218✔
267
        m_free_lengths.create(Array::type_Normal); // Throws
73,218✔
268
        _impl::DestroyGuard<Array> dg(&m_free_lengths);
73,218✔
269
        m_free_lengths.update_parent(); // Throws
73,218✔
270
        dg.release();
73,218✔
271
    }
73,218✔
272

683,586✔
273
    DB::version_type initial_version = 0;
1,362,801✔
274

683,586✔
275
    if (ref_type ref = m_free_versions.get_ref_from_parent()) {
1,362,801✔
276
        m_free_versions.init_from_ref(ref);
1,289,595✔
277
        REALM_ASSERT_RELEASE_EX(m_free_versions.size() == m_free_lengths.size(), top.get_ref(),
1,289,595✔
278
                                m_free_versions.size(), m_free_lengths.size());
1,289,595✔
279
    }
1,289,595✔
280
    else {
73,206✔
281
        int_fast64_t value = int_fast64_t(initial_version);
73,206✔
282
        top.set(6, 1 + 2 * uint64_t(initial_version)); // Throws
73,206✔
283
        size_t n = m_free_positions.size();
73,206✔
284
        bool context_flag = false;
73,206✔
285
        m_free_versions.create(Array::type_Normal, context_flag, n, value); // Throws
73,206✔
286
        _impl::DestroyGuard<Array> dg(&m_free_versions);
73,206✔
287
        m_free_versions.update_parent(); // Throws
73,206✔
288
        dg.release();
73,206✔
289
    }
73,206✔
290
    m_evacuation_limit = 0;
1,362,801✔
291
    m_backoff = 0;
1,362,801✔
292
    if (top.size() > Group::s_evacuation_point_ndx) {
1,362,801✔
293
        if (auto val = top.get(Group::s_evacuation_point_ndx)) {
9,408✔
294
            Array arr(m_alloc);
6,468✔
295
            if (val & 1) {
6,468✔
296
                m_evacuation_limit = size_t(val >> 1);
114✔
297
                arr.create(Node::type_Normal);
114✔
298
                arr.add(uint64_t(m_evacuation_limit));
114✔
299
                arr.add(0); // Backoff = false
114✔
300
                top.set_as_ref(Group::s_evacuation_point_ndx, arr.get_ref());
114✔
301
            }
114✔
302
            else {
6,354✔
303
                arr.init_from_ref(to_ref(val));
6,354✔
304
                auto sz = arr.size();
6,354✔
305
                REALM_ASSERT(sz >= 2);
6,354✔
306
                m_evacuation_limit = size_t(arr.get(0));
6,354✔
307
                m_backoff = arr.get(1);
6,354✔
308
                if (m_backoff > 0) {
6,354✔
309
                    --m_backoff;
1,143✔
310
                }
1,143✔
311
                else {
5,211✔
312
                    for (size_t i = 2; i < sz; i++) {
41,598✔
313
                        m_evacuation_progress.push_back(size_t(arr.get(i)));
36,387✔
314
                    }
36,387✔
315
                }
5,211✔
316
                // We give up if the freelists were allocated above the evacuation limit
2,985✔
317
                if (m_evacuation_limit > 0 && free_positions_ref > m_evacuation_limit) {
6,354✔
318
                    // Wait 10 commits until trying again
33✔
319
                    m_backoff = 10;
90✔
320
                    m_evacuation_limit = 0;
90✔
321
                    if (auto logger = m_group.get_logger()) {
90✔
NEW
322
                        logger->log(util::Logger::Level::detail, "Give up compaction");
×
NEW
323
                    }
×
324
                }
90✔
325
            }
6,354✔
326
        }
6,468✔
327
    }
9,408✔
328
}
1,362,801✔
329

330
GroupWriter::~GroupWriter() = default;
1,362,810✔
331

332
size_t GroupWriter::get_file_size() const noexcept
333
{
99,300✔
334
    auto sz = to_size_t(m_alloc.get_file_size());
99,300✔
335
    return sz;
99,300✔
336
}
99,300✔
337

338
void GroupWriter::flush_all_mappings()
339
{
1,355,295✔
340
    for (const auto& window : m_map_windows) {
1,314,570✔
341
        window->flush();
1,273,869✔
342
    }
1,273,869✔
343
}
1,355,295✔
344

345
void GroupWriter::sync_all_mappings()
346
{
7,707✔
347
    if (m_durability == Durability::Unsafe)
7,707✔
348
        return;
×
349
    for (const auto& window : m_map_windows) {
7,707✔
350
        window->sync();
7,707✔
351
    }
7,707✔
352
}
7,707✔
353

354
// Get a window matching a request, either creating a new window or reusing an
355
// existing one (possibly extended to accomodate the new request). Maintain a
356
// cache of open windows which are sync'ed and closed following a least recently
357
// used policy. Entries in the cache are kept in MRU order.
358
GroupWriter::MapWindow* GroupWriter::get_window(ref_type start_ref, size_t size)
359
{
20,692,959✔
360
    auto match = std::find_if(m_map_windows.begin(), m_map_windows.end(), [&](const auto& window) {
20,053,845✔
361
        return window->matches(start_ref, size) || window->extends_to_match(m_alloc.get_file(), start_ref, size);
19,410,828✔
362
    });
19,410,828✔
363
    if (match != m_map_windows.end()) {
20,692,959✔
364
        // move matching window to top (to keep LRU order)
9,651,084✔
365
        std::rotate(m_map_windows.begin(), match, match + 1);
19,410,276✔
366
        return m_map_windows[0].get();
19,410,276✔
367
    }
19,410,276✔
368
    // no window found, make room for a new one at the top
643,746✔
369
    if (m_map_windows.size() == num_map_windows) {
1,282,683✔
370
        m_map_windows.back()->flush();
×
371
        m_map_windows.pop_back();
×
372
    }
×
373
    auto new_window =
1,282,683✔
374
        std::make_unique<MapWindow>(m_window_alignment, m_alloc.get_file(), start_ref, size, m_write_marker);
1,282,683✔
375
    m_map_windows.insert(m_map_windows.begin(), std::move(new_window));
1,282,683✔
376
    return m_map_windows[0].get();
1,282,683✔
377
}
1,282,683✔
378

379
#define REALM_ALLOC_DEBUG 0
380
#if REALM_ALLOC_DEBUG
381
#define ALLOC_DBG_COUT(args)                                                                                         \
382
    {                                                                                                                \
383
        std::cout << args;                                                                                           \
384
    }
385
#else
386
#define ALLOC_DBG_COUT(args)
387
#endif
388

389
#ifdef REALM_DEBUG
390
void GroupWriter::map_reachable()
391
{
59,052✔
392
    class Collector : public Array::MemUsageHandler {
59,052✔
393
    public:
59,052✔
394
        Collector(std::vector<Reachable>& reachable)
59,052✔
395
            : m_reachable(reachable)
59,052✔
396
        {
665,691✔
397
        }
665,691✔
398
        void handle(ref_type ref, size_t, size_t used) override
59,052✔
399
        {
20,353,629✔
400
            m_reachable.emplace_back(Reachable{ref, used});
20,353,629✔
401
        }
20,353,629✔
402
        std::vector<Reachable>& m_reachable;
59,052✔
403
    };
59,052✔
404
    // collect reachable blocks in all reachable versions
28,896✔
405
    for (auto& [version, info] : m_top_ref_map) {
665,694✔
406
        Collector collector(info.reachable_blocks);
665,694✔
407
        // skip any empty entries
331,521✔
408
        if (info.top_ref == 0)
665,694✔
409
            continue;
48✔
410
        Array array(m_alloc);
665,646✔
411
        array.init_from_ref(info.top_ref);
665,646✔
412
        array.report_memory_usage(collector);
665,646✔
413
        std::sort(info.reachable_blocks.begin(), info.reachable_blocks.end(),
665,646✔
414
                  [](const Reachable& a, const Reachable& b) {
113,363,904✔
415
                      return a.pos < b.pos;
113,363,904✔
416
                  });
113,363,904✔
417
    }
665,646✔
418

28,896✔
419
#if REALM_ALLOC_DEBUG
420
    std::cout << "  Reachable: ";
421
    // this really should be inverted, showing all versions pr entry instead of all entries pr version
422
    for (auto& [version, info] : m_top_ref_map) {
423
        std::cout << std::endl << "    Version: " << version;
424
        for (auto& i : info.reachable_blocks) {
425
            std::cout << std::endl << "      " << i.pos << " - " << i.pos + i.size;
426
        }
427
    }
428
    std::cout << std::endl << "  Backdating:";
429
#endif
430
}
59,052✔
431
#endif
432

433
void GroupWriter::backdate()
434
{
59,052✔
435
    struct FreeList {
59,052✔
436
        Array positions;
59,052✔
437
        Array lengths;
59,052✔
438
        Array versions;
59,052✔
439
        ref_type top_ref;
59,052✔
440
        ref_type logical_file_size;
59,052✔
441
        uint64_t version;
59,052✔
442
        bool initialized = false;
59,052✔
443
        FreeList(Allocator& alloc, ref_type top, ref_type logical_file_size, uint64_t version)
59,052✔
444
            : positions(alloc)
59,052✔
445
            , lengths(alloc)
59,052✔
446
            , versions(alloc)
59,052✔
447
            , top_ref(top)
59,052✔
448
            , logical_file_size(logical_file_size)
59,052✔
449
            , version(version)
59,052✔
450
        {
665,691✔
451
        }
665,691✔
452
    };
59,052✔
453

28,896✔
454

28,896✔
455
    using FreeListMap = std::vector<std::unique_ptr<FreeList>>;
59,052✔
456
    FreeListMap old_freelists;
59,052✔
457
    old_freelists.reserve(m_top_ref_map.size());
59,052✔
458
    for (auto& [version, info] : m_top_ref_map) {
665,694✔
459
        if (version < m_oldest_reachable_version)
665,694✔
460
            continue;
×
461
        auto e = std::make_unique<FreeList>(m_alloc, info.top_ref, info.logical_file_size, version);
665,694✔
462
        old_freelists.push_back(std::move(e));
665,694✔
463
    }
665,694✔
464

28,896✔
465

28,896✔
466
    // little helper: get the youngest version older than given
28,896✔
467
    auto get_earlier = [&](uint64_t version) -> FreeList* {
4,222,308✔
468
        auto it = std::lower_bound(old_freelists.begin(), old_freelists.end(), version,
4,222,308✔
469
                                   [](const std::unique_ptr<FreeList>& e, uint64_t v) {
21,717,522✔
470
                                       return e->version < v;
21,717,522✔
471
                                   });
21,717,522✔
472
        // There will always be at least one freelist:
2,060,898✔
473
        REALM_ASSERT(it != old_freelists.end());
4,222,308✔
474
        REALM_ASSERT(it != old_freelists.begin());
4,222,308✔
475
        --it;
4,222,308✔
476
        REALM_ASSERT((*it)->version < version);
4,222,308✔
477
        return it->get();
4,222,308✔
478
    };
4,222,308✔
479

28,896✔
480

28,896✔
481
    // find (if possible) youngest time stamp in any block in a sequence that fully covers a given one.
28,896✔
482
    auto find_cover_for = [&](const FreeSpaceEntry& entry, FreeList& free_list) -> std::optional<uint64_t> {
4,222,365✔
483
        auto entry_end = std::min(entry.ref + entry.size, free_list.logical_file_size);
4,222,365✔
484
        if (entry.ref >= entry_end) {
4,222,365✔
485
            return 0; // block completely beyond end of file
10,155✔
486
        }
10,155✔
487

2,053,896✔
488
        if (!free_list.initialized) {
4,212,210✔
489
            // setup arrays
302,604✔
490
            free_list.initialized = true;
606,597✔
491
            if (free_list.top_ref) {
606,597✔
492
                Array top_array(m_alloc);
606,597✔
493
                top_array.init_from_ref(free_list.top_ref);
606,597✔
494
                if (top_array.size() > Group::s_free_version_ndx) {
606,597✔
495
                    // we have a freelist with versioning info
302,580✔
496
                    free_list.positions.init_from_ref(top_array.get_as_ref(Group::s_free_pos_ndx));
606,573✔
497
                    free_list.lengths.init_from_ref(top_array.get_as_ref(Group::s_free_size_ndx));
606,573✔
498
                    free_list.versions.init_from_ref(top_array.get_as_ref(Group::s_free_version_ndx));
606,573✔
499
                }
606,573✔
500
            }
606,597✔
501
        }
606,597✔
502

2,053,896✔
503
        if (!free_list.positions.is_attached()) {
4,212,210✔
504
            return {}; // no free list associated with that version
×
505
        }
×
506
        const size_t limit = free_list.positions.size();
4,212,210✔
507
        if (limit == 0) {
4,212,210✔
508
            return {}; // empty freelist
24✔
509
        }
24✔
510
        const size_t index = free_list.positions.upper_bound_int(entry.ref) - 1;
4,212,186✔
511
        if (index == size_t(-1)) {
4,212,186✔
512
            return {}; // no free blocks before the 'ref' we are looking for
49,182✔
513
        }
49,182✔
514
        REALM_ASSERT(index < limit); // follows from above
4,163,004✔
515
        const auto start_pos = static_cast<ref_type>(free_list.positions.get(index));
4,163,004✔
516
        REALM_ASSERT(start_pos <= entry.ref);
4,163,004✔
517
        auto end_pos = start_pos + static_cast<ref_type>(free_list.lengths.get(index));
4,163,004✔
518
        if (end_pos <= entry.ref) {
4,163,004✔
519
            return {}; // free block ends before the 'ref' we are looking for
3,650,856✔
520
        }
3,650,856✔
521
        uint64_t found_version = free_list.versions.get(index);
512,148✔
522

236,028✔
523
        // coalesce any subsequent contiguous entries
236,028✔
524
        for (auto next = index + 1;
512,148✔
525
             next < limit && free_list.positions.get(next) == (int64_t)end_pos && end_pos < entry_end; ++next) {
605,706✔
526
            end_pos += static_cast<ref_type>(free_list.lengths.get(next));
93,558✔
527
            // pick youngest (highest) version of blocks
41,637✔
528
            found_version = std::max<uint64_t>(found_version, free_list.versions.get(next));
93,558✔
529
        }
93,558✔
530
        // is the block fully covered by range established above?
236,028✔
531
        if (end_pos < entry_end) {
512,148✔
532
            return {}; // no, it isn't
5,685✔
533
        }
5,685✔
534
        REALM_ASSERT(found_version <= entry.released_at_version);
506,463✔
535
        return found_version;
506,463✔
536
    };
506,463✔
537

28,896✔
538
    // check if a given entry overlaps a reachable block. Only used in debug mode.
28,896✔
539
    auto is_referenced = [&](FreeSpaceEntry& entry) -> bool {
4,127,178✔
540
#ifdef REALM_DEBUG
4,127,178✔
541
        bool referenced = false;
4,127,178✔
542
        ALLOC_DBG_COUT("    Considering [" << entry.ref << ", " << entry.size << "]-" << entry.released_at_version
4,127,178✔
543
                                           << " {");
4,127,178✔
544
        auto end = m_top_ref_map.end();
4,127,178✔
545
        for (auto top_ref_map = m_top_ref_map.begin(); top_ref_map != end && !referenced; ++top_ref_map) {
145,257,633✔
546
            auto info_begin = top_ref_map->second.reachable_blocks.begin();
141,130,455✔
547
            auto info_end = top_ref_map->second.reachable_blocks.end();
141,130,455✔
548
            auto it = std::lower_bound(info_begin, info_end, entry.ref, [](const Reachable& a, size_t val) {
615,785,733✔
549
                return val > a.pos;
615,785,733✔
550
            });
615,785,733✔
551
            if (it != info_end) {
141,130,455✔
552
                if (it != info_begin)
118,141,368✔
553
                    --it;
118,130,358✔
554
                while (it != info_end && it->pos < entry.ref + entry.size) {
236,274,324✔
555
                    if (it->pos + it->size > entry.ref) {
121,839,753✔
556
                        ALLOC_DBG_COUT(top_ref_map->first << " ");
3,706,797✔
557
                        referenced = true;
3,706,797✔
558
                        break;
3,706,797✔
559
                    }
3,706,797✔
560
                    ++it;
118,132,956✔
561
                }
118,132,956✔
562
            }
118,141,368✔
563
        }
141,130,455✔
564
        if (!referenced) {
4,127,178✔
565
            ALLOC_DBG_COUT("none");
420,561✔
566
        }
420,561✔
567
        ALLOC_DBG_COUT("} ");
4,127,178✔
568
        return referenced;
4,127,178✔
569
#else
570
        static_cast<void>(entry); // silence a warning
571
        return false;
572
#endif
573
    };
4,127,178✔
574

28,896✔
575
    auto backdate_single_entry = [&](FreeSpaceEntry& entry) -> void {
4,127,193✔
576
        const auto referenced = is_referenced(entry);
4,127,193✔
577
        // early out if the reference is to the most recent version
2,021,481✔
578
        if (entry.released_at_version == m_current_version) {
4,127,193✔
579
            REALM_ASSERT_DEBUG(!referenced);
×
580
            return;
×
581
        }
×
582
        while (entry.released_at_version) {
4,642,752✔
583
            // early out for references before oldest freelist:
2,146,242✔
584
            if (entry.released_at_version <= this->m_oldest_reachable_version) {
4,396,644✔
585
                REALM_ASSERT_DEBUG(!referenced);
174,348✔
586
                break;
174,348✔
587
            }
174,348✔
588
            auto earlier_it = get_earlier(entry.released_at_version);
4,222,296✔
589
            ALLOC_DBG_COUT(" - earlier freelist: " << earlier_it->version);
4,222,296✔
590
            if (auto covering_version = find_cover_for(entry, *earlier_it)) {
4,222,296✔
591
                ALLOC_DBG_COUT("  backdating [" << entry.ref << ", " << entry.size << "]  version: "
515,559✔
592
                                                << entry.released_at_version << " -> " << *covering_version);
515,559✔
593
                REALM_ASSERT_DEBUG(!referenced);
515,559✔
594
                entry.released_at_version = *covering_version;
515,559✔
595
            }
515,559✔
596
            else {
3,706,737✔
597
                ALLOC_DBG_COUT("  not free at that point");
3,706,737✔
598
                REALM_ASSERT_DEBUG(referenced);
3,706,737✔
599
                break;
3,706,737✔
600
            }
3,706,737✔
601
        }
4,222,296✔
602
        ALLOC_DBG_COUT(std::endl);
4,127,193✔
603
    };
4,127,193✔
604

28,896✔
605

28,896✔
606
#ifdef REALM_DEBUG
59,052✔
607
    map_reachable();
59,052✔
608
#endif
59,052✔
609
    for (auto&& entry : m_not_free_in_file) {
4,127,205✔
610
        backdate_single_entry(entry);
4,127,205✔
611
    }
4,127,205✔
612
}
59,052✔
613

614
ref_type GroupWriter::write_group()
615
{
1,361,505✔
616
#if REALM_METRICS
1,361,505✔
617
    std::unique_ptr<MetricTimer> fsync_timer = Metrics::report_write_time(m_group);
1,361,505✔
618
#endif // REALM_METRICS
1,361,505✔
619

683,304✔
620
    ALLOC_DBG_COUT("Commit nr " << m_current_version << "   ( from " << m_oldest_reachable_version << " )"
1,361,505✔
621
                                << std::endl);
1,361,505✔
622

683,304✔
623
    read_in_freelist();
1,361,505✔
624
    // Now, 'm_size_map' holds all free elements candidate for recycling
683,304✔
625

683,304✔
626
    Array& top = m_group.m_top;
1,361,505✔
627
    ALLOC_DBG_COUT("  Allocating file space for data:" << std::endl);
1,361,505✔
628

683,304✔
629
    // Recursively write all changed arrays (but not 'top' and free-lists yet,
683,304✔
630
    // as they are going to change along the way.) If free space is available in
683,304✔
631
    // the attached database file, we use it, but this does not include space
683,304✔
632
    // that has been release during the current transaction (or since the last
683,304✔
633
    // commit), as that would lead to clobbering of the previous database
683,304✔
634
    // version.
683,304✔
635
    bool deep = true, only_if_modified = true;
1,361,505✔
636
    std::unique_ptr<InMemoryWriter> in_memory_writer;
1,361,505✔
637
    _impl::ArrayWriterBase* writer = this;
1,361,505✔
638
    if (m_alloc.is_in_memory()) {
1,361,505✔
639
        in_memory_writer = std::make_unique<InMemoryWriter>(*this);
81,453✔
640
        writer = in_memory_writer.get();
81,453✔
641
    }
81,453✔
642
    ref_type names_ref = m_group.m_table_names.write(*writer, deep, only_if_modified); // Throws
1,361,505✔
643
    ref_type tables_ref = m_group.m_tables.write(*writer, deep, only_if_modified);     // Throws
1,361,505✔
644

683,304✔
645
    int_fast64_t value_1 = from_ref(names_ref);
1,361,505✔
646
    int_fast64_t value_2 = from_ref(tables_ref);
1,361,505✔
647
    top.set(0, value_1); // Throws
1,361,505✔
648
    top.set(1, value_2); // Throws
1,361,505✔
649

683,304✔
650
    // If file has a history and is opened in shared mode, write the new history
683,304✔
651
    // to the file. If the file has a history, but si not opened in shared mode,
683,304✔
652
    // discard the history, as it could otherwise be left in an inconsistent state.
683,304✔
653
    if (top.size() > Group::s_hist_ref_ndx) {
1,361,505✔
654
        if (ref_type history_ref = top.get_as_ref(Group::s_hist_ref_ndx)) {
1,328,490✔
655
            Allocator& alloc = top.get_alloc();
555,255✔
656
            ref_type new_history_ref = Array::write(history_ref, alloc, *writer, only_if_modified); // Throws
555,255✔
657
            top.set(Group::s_hist_ref_ndx, from_ref(new_history_ref));                            // Throws
555,255✔
658
        }
555,255✔
659
    }
1,328,490✔
660
    if (top.size() > Group::s_evacuation_point_ndx) {
1,361,505✔
661
        ref_type ref = top.get_as_ref(Group::s_evacuation_point_ndx);
9,408✔
662
        if (m_evacuation_limit || m_backoff) {
9,408✔
663
            REALM_ASSERT(ref);
6,384✔
664
            Array arr(m_alloc);
6,384✔
665
            arr.init_from_ref(ref);
6,384✔
666
            arr.truncate(2);
6,384✔
667

3,000✔
668
            arr.set(0, int64_t(m_evacuation_limit));
6,384✔
669
            if (m_backoff == 0 && m_evacuation_progress.empty()) {
6,384✔
670
                // We have done a scan - Now we should just wait for the nodes still
39✔
671
                // being in the evacuation zone being released by the transactions
39✔
672
                // still holding on to them. This could take many commits.
39✔
673
                m_backoff = 1000;
102✔
674
            }
102✔
675
            arr.set(1, m_backoff); // Backoff from scanning
6,384✔
676
            for (auto index : m_evacuation_progress) {
36,459✔
677
                arr.add(int64_t(index));
36,459✔
678
            }
36,459✔
679
            ref = arr.write(*writer, false, only_if_modified);
6,384✔
680
            top.set_as_ref(Group::s_evacuation_point_ndx, ref);
6,384✔
681
        }
6,384✔
682
        else if (ref) {
3,024✔
683
            Array::destroy(ref, m_alloc);
84✔
684
            top.set(Group::s_evacuation_point_ndx, 0);
84✔
685
        }
84✔
686
    }
9,408✔
687

683,304✔
688
    ALLOC_DBG_COUT("  Freelist size after allocations: " << m_size_map.size() << std::endl);
1,361,505✔
689
    // We now back-date (if possible) any blocks freed in versions which
683,304✔
690
    // are becoming unreachable.
683,304✔
691
    if (m_any_new_unreachables)
1,361,505✔
692
        backdate();
59,052✔
693

683,304✔
694
    // We now have a bit of a chicken-and-egg problem. We need to write the
683,304✔
695
    // free-lists to the file, but the act of writing them will consume free
683,304✔
696
    // space, and thereby change the free-lists. To solve this problem, we
683,304✔
697
    // calculate an upper bound on the amount af space required for all of the
683,304✔
698
    // remaining arrays and allocate the space as one big chunk. This way we can
683,304✔
699
    // finalize the free-lists before writing them to the file.
683,304✔
700
    size_t max_free_list_size = m_size_map.size();
1,361,505✔
701

683,304✔
702
    // We need to add to the free-list any space that was freed during the
683,304✔
703
    // current transaction, but to avoid clobering the previous version, we
683,304✔
704
    // cannot add it yet. Instead we simply account for the space
683,304✔
705
    // required. Since we will modify the free-lists themselves, we must ensure
683,304✔
706
    // that the original arrays used by the free-lists are counted as part of
683,304✔
707
    // the space that was freed during the current transaction. Note that a
683,304✔
708
    // copy-on-write on m_free_positions, for example, also implies a
683,304✔
709
    // copy-on-write on Group::m_top.
683,304✔
710
    ALLOC_DBG_COUT("  In-mem freelist before/after consolidation: " << m_group.m_alloc.m_free_read_only.size());
1,361,505✔
711
    size_t free_read_only_size = m_group.m_alloc.consolidate_free_read_only(); // Throws
1,361,505✔
712
    ALLOC_DBG_COUT("/" << free_read_only_size << std::endl);
1,361,505✔
713
    max_free_list_size += free_read_only_size;
1,361,505✔
714
    max_free_list_size += m_not_free_in_file.size();
1,361,505✔
715
    max_free_list_size += m_under_evacuation.size();
1,361,505✔
716
    // The final allocation of free space (i.e., the call to
683,304✔
717
    // reserve_free_space() below) may add extra entries to the free-lists.
683,304✔
718
    // We reserve room for the worst case scenario, which is as follows:
683,304✔
719
    // If the database has *max* theoretical fragmentation, it'll need one
683,304✔
720
    // entry in the free list for every 16 bytes, because both allocated and
683,304✔
721
    // free chunks are at least 8 bytes in size. For databases smaller than 2Gb
683,304✔
722
    // each free list entry requires 16 bytes (4 for the position, 4 for the
683,304✔
723
    // size and 8 for the version). The worst case scenario thus needs access
683,304✔
724
    // to a contiguous address range equal to existing database size.
683,304✔
725
    // This growth requires at the most 8 extension steps, each adding one entry
683,304✔
726
    // to the free list. The worst case occurs when you will have to expand the
683,304✔
727
    // size to over 2 GB where each entry suddenly requires 24 bytes. In this
683,304✔
728
    // case you will need 2 extra steps.
683,304✔
729
    // Another limit is due to the fact than an array holds less than 0x1000000
683,304✔
730
    // entries, so the total free list size will be less than 0x16000000. So for
683,304✔
731
    // bigger databases the space required for free lists will be relatively less.
683,304✔
732
    max_free_list_size += 10;
1,361,505✔
733

683,304✔
734
    size_t max_free_space_needed =
1,361,505✔
735
        Array::get_max_byte_size(top.size()) + size_per_free_list_entry() * max_free_list_size;
1,361,505✔
736

683,304✔
737
    ALLOC_DBG_COUT("  Allocating file space for freelists:" << std::endl);
1,361,505✔
738
    // Reserve space for remaining arrays. We ask for some extra bytes beyond the
683,304✔
739
    // maximum number that is required. This ensures that even if we end up
683,304✔
740
    // using the maximum size possible, we still do not end up with a zero size
683,304✔
741
    // free-space chunk as we deduct the actually used size from it.
683,304✔
742
    auto reserve = reserve_free_space(max_free_space_needed + 8); // Throws
1,361,505✔
743
    size_t reserve_pos = reserve->second;
1,361,505✔
744
    size_t reserve_size = reserve->first;
1,361,505✔
745

683,304✔
746
    // Now we can check, if we can reduce the logical file size. This can be done
683,304✔
747
    // when there is only one block in m_under_evacuation, which means that all
683,304✔
748
    // nodes in this range have been moved
683,304✔
749
    if (m_under_evacuation.size() == 1) {
1,361,505✔
750
        auto& elem = m_under_evacuation.back();
18✔
751
        if (elem.ref + elem.size == m_logical_size) {
18✔
752
            // This is at the end of the file
9✔
753
            size_t pos = elem.ref;
18✔
754
            m_logical_size = util::round_up_to_page_size(pos);
18✔
755
            elem.size = (m_logical_size - pos);
18✔
756
            if (elem.size == 0)
18✔
757
                m_under_evacuation.clear();
18✔
758
            top.set(Group::s_file_size_ndx, RefOrTagged::make_tagged(m_logical_size));
18✔
759
            auto ref = top.get_as_ref(Group::s_evacuation_point_ndx);
18✔
760
            REALM_ASSERT(ref);
18✔
761
            Array::destroy(ref, m_alloc);
18✔
762
            top.set(Group::s_evacuation_point_ndx, 0);
18✔
763
            m_evacuation_limit = 0;
18✔
764

9✔
765
            if (auto logger = m_group.get_logger()) {
18✔
NEW
766
                logger->log(util::Logger::Level::detail, "New logical size %1", m_logical_size);
×
NEW
767
            }
×
768
        }
18✔
769
    }
18✔
770

683,304✔
771
    // At this point we have allocated all the space we need, so we can add to
683,304✔
772
    // the free-lists any free space created during the current transaction (or
683,304✔
773
    // since last commit). Had we added it earlier, we would have risked
683,304✔
774
    // clobbering the previous database version. Note, however, that this risk
683,304✔
775
    // would only have been present in the non-transactional case where there is
683,304✔
776
    // no version tracking on the free-space chunks.
683,304✔
777

683,304✔
778
    // Now, let's update the realm-style freelists, which will later be written to file.
683,304✔
779
    // Function returns index of element holding the space reserved for the free
683,304✔
780
    // lists in the file.
683,304✔
781
    size_t reserve_ndx = recreate_freelist(reserve_pos);
1,361,505✔
782

683,304✔
783
    ALLOC_DBG_COUT("  Freelist size after merge: " << m_free_positions.size() << "   freelist space required: "
1,361,505✔
784
                                                   << max_free_space_needed << std::endl);
1,361,505✔
785
    // Before we calculate the actual sizes of the free-list arrays, we must
683,304✔
786
    // make sure that the final adjustments of the free lists (i.e., the
683,304✔
787
    // deduction of the actually used space from the reserved chunk,) will not
683,304✔
788
    // change the byte-size of those arrays.
683,304✔
789
    // size_t reserve_pos = to_size_t(m_free_positions.get(reserve_ndx));
683,304✔
790
    REALM_ASSERT_3(reserve_size, >, max_free_space_needed);
1,361,505✔
791
    int_fast64_t value_4 = to_int64(reserve_pos + max_free_space_needed);
1,361,505✔
792

683,304✔
793
#if REALM_ENABLE_MEMDEBUG
794
    m_free_positions.m_no_relocation = true;
795
    m_free_lengths.m_no_relocation = true;
796
#endif
797

683,304✔
798
    // Ensure that this arrays does not reposition itself
683,304✔
799
    m_free_positions.ensure_minimum_width(value_4); // Throws
1,361,505✔
800

683,304✔
801
    // Get final sizes of free-list arrays
683,304✔
802
    size_t free_positions_size = m_free_positions.get_byte_size();
1,361,505✔
803
    size_t free_sizes_size = m_free_lengths.get_byte_size();
1,361,505✔
804
    size_t free_versions_size = m_free_versions.get_byte_size();
1,361,505✔
805
    REALM_ASSERT(Array::get_wtype_from_header(Array::get_header_from_data(m_free_versions.m_data)) ==
1,361,505✔
806
                 Array::wtype_Bits);
1,361,505✔
807

683,304✔
808
    // Calculate write positions
683,304✔
809
    ref_type reserve_ref = to_ref(reserve_pos);
1,361,505✔
810
    ref_type free_positions_ref = reserve_ref;
1,361,505✔
811
    ref_type free_sizes_ref = free_positions_ref + free_positions_size;
1,361,505✔
812
    ref_type free_versions_ref = free_sizes_ref + free_sizes_size;
1,361,505✔
813
    ref_type top_ref = free_versions_ref + free_versions_size;
1,361,505✔
814

683,304✔
815
    // Update top to point to the calculated positions
683,304✔
816
    top.set(Group::s_free_pos_ndx, from_ref(free_positions_ref));               // Throws
1,361,505✔
817
    top.set(Group::s_free_size_ndx, from_ref(free_sizes_ref));                  // Throws
1,361,505✔
818
    top.set(Group::s_free_version_ndx, from_ref(free_versions_ref));            // Throws
1,361,505✔
819
    top.set(Group::s_version_ndx, RefOrTagged::make_tagged(m_current_version)); // Throws
1,361,505✔
820

683,304✔
821
    // Compacting files smaller than 1 Mb is not worth the effort. Arbitrary chosen value.
683,304✔
822
    static constexpr size_t minimal_compaction_size = 0x100000;
1,361,505✔
823
    if (m_logical_size >= minimal_compaction_size && m_evacuation_limit == 0 && m_backoff == 0) {
1,361,505✔
824
        // We might have allocated a bigger chunk than needed for the free lists, so if we
12,975✔
825
        // add what we have reserved and subtract what was requested, we get a better measure
12,975✔
826
        // for what will be free eventually. Also subtract the locked space as this is not
12,975✔
827
        // actually free.
12,975✔
828
        size_t free_space = m_free_space_size + reserve_size - max_free_space_needed - m_locked_space_size;
25,545✔
829
        REALM_ASSERT(m_logical_size > free_space);
25,545✔
830
        size_t used_space = m_logical_size - free_space;
25,545✔
831
        if (free_space > 2 * used_space) {
25,545✔
832
            // Clean up potential
45✔
833
            auto limit = util::round_up_to_page_size(used_space + used_space / 2);
114✔
834

45✔
835
            // If we make the file too small, there is a big chance it will grow immediately afterwards
45✔
836
            static constexpr size_t minimal_evac_limit = 0x10000;
114✔
837
            m_evacuation_limit = std::max(minimal_evac_limit, limit);
114✔
838

45✔
839
            // From now on, we will only allocate below this limit
45✔
840
            // Save the limit in the file
45✔
841
            while (top.size() <= Group::s_evacuation_point_ndx) {
144✔
842
                top.add(0);
30✔
843
            }
30✔
844
            top.set(Group::s_evacuation_point_ndx, RefOrTagged::make_tagged(m_evacuation_limit));
114✔
845
            if (auto logger = m_group.get_logger()) {
114✔
NEW
846
                logger->log(util::Logger::Level::detail, "Start compaction with limit %1", m_evacuation_limit);
×
NEW
847
            }
×
848
        }
114✔
849
    }
25,545✔
850

683,304✔
851
    // Get final sizes
683,304✔
852
    size_t top_byte_size = top.get_byte_size();
1,361,505✔
853
    ref_type end_ref = top_ref + top_byte_size;
1,361,505✔
854
    REALM_ASSERT_3(size_t(end_ref), <=, reserve_pos + max_free_space_needed);
1,361,505✔
855

683,304✔
856
    // Deduct the used space from the reserved chunk. Note that we have made
683,304✔
857
    // sure that the remaining size is never zero. Also, by the call to
683,304✔
858
    // m_free_positions.ensure_minimum_width() above, we have made sure that
683,304✔
859
    // m_free_positions has the capacity to store the new larger value without
683,304✔
860
    // reallocation.
683,304✔
861
    size_t rest = reserve_pos + reserve_size - size_t(end_ref);
1,361,505✔
862
    size_t used = size_t(end_ref) - reserve_pos;
1,361,505✔
863
    REALM_ASSERT_3(rest, >, 0);
1,361,505✔
864
    int_fast64_t value_8 = from_ref(end_ref);
1,361,505✔
865
    int_fast64_t value_9 = to_int64(rest);
1,361,505✔
866

683,304✔
867
    // value_9 is guaranteed to be smaller than the existing entry in the array and hence will not cause bit
683,304✔
868
    // expansion
683,304✔
869
    REALM_ASSERT_3(value_8, <=, Array::ubound_for_width(m_free_positions.get_width()));
1,361,505✔
870
    REALM_ASSERT_3(value_9, <=, Array::ubound_for_width(m_free_lengths.get_width()));
1,361,505✔
871

683,304✔
872
    m_free_positions.set(reserve_ndx, value_8); // Throws
1,361,505✔
873
    m_free_lengths.set(reserve_ndx, value_9);   // Throws
1,361,505✔
874
    m_free_space_size += rest;
1,361,505✔
875

683,304✔
876
#if REALM_ALLOC_DEBUG
877
    std::cout << "  Final Freelist:" << std::endl;
878
    for (size_t j = 0; j < m_free_positions.size(); ++j) {
879
        std::cout << "    [" << m_free_positions.get(j) << ".." << m_free_lengths.get(j);
880
        if (m_free_versions.size()) {
881
            std::cout << "]: " << m_free_versions.get(j);
882
        }
883
    }
884
    std::cout << std::endl << std::endl;
885
#endif
886

683,304✔
887
    // The free-list now have their final form, so we can write them to the file
683,304✔
888
    // char* start_addr = m_file_map.get_addr() + reserve_ref;
683,304✔
889
    if (m_alloc.is_in_memory()) {
1,361,505✔
890
        auto translator = in_memory_writer.get();
81,453✔
891
        write_array_at(translator, free_positions_ref, m_free_positions.get_header(), free_positions_size); // Throws
81,453✔
892
        write_array_at(translator, free_sizes_ref, m_free_lengths.get_header(), free_sizes_size);           // Throws
81,453✔
893
        write_array_at(translator, free_versions_ref, m_free_versions.get_header(), free_versions_size);    // Throws
81,453✔
894

40,728✔
895
        // Write top
40,728✔
896
        write_array_at(translator, top_ref, top.get_header(), top_byte_size); // Throws
81,453✔
897
    }
81,453✔
898
    else {
1,280,052✔
899
        MapWindow* window = get_window(reserve_ref, end_ref - reserve_ref);
1,280,052✔
900
        char* start_addr = window->translate(reserve_ref);
1,280,052✔
901
        window->encryption_read_barrier(start_addr, used);
1,280,052✔
902
        write_array_at(window, free_positions_ref, m_free_positions.get_header(), free_positions_size); // Throws
1,280,052✔
903
        write_array_at(window, free_sizes_ref, m_free_lengths.get_header(), free_sizes_size);           // Throws
1,280,052✔
904
        write_array_at(window, free_versions_ref, m_free_versions.get_header(), free_versions_size);    // Throws
1,280,052✔
905
        REALM_ASSERT_EX(
1,280,052✔
906
            free_positions_ref >= reserve_ref && free_positions_ref + free_positions_size <= reserve_ref + used,
1,280,052✔
907
            reserve_ref, reserve_ref + used, free_positions_ref, free_positions_ref + free_positions_size, top_ref);
1,280,052✔
908
        REALM_ASSERT_EX(free_sizes_ref >= reserve_ref && free_sizes_ref + free_sizes_size <= reserve_ref + used,
1,280,052✔
909
                        reserve_ref, reserve_ref + used, free_sizes_ref, free_sizes_ref + free_sizes_size, top_ref);
1,280,052✔
910
        REALM_ASSERT_EX(
1,280,052✔
911
            free_versions_ref >= reserve_ref && free_versions_ref + free_versions_size <= reserve_ref + used,
1,280,052✔
912
            reserve_ref, reserve_ref + used, free_versions_ref, free_versions_ref + free_versions_size, top_ref);
1,280,052✔
913

642,576✔
914

642,576✔
915
        // Write top
642,576✔
916
        write_array_at(window, top_ref, top.get_header(), top_byte_size); // Throws
1,280,052✔
917
        window->encryption_write_barrier(start_addr, used);
1,280,052✔
918
    }
1,280,052✔
919
    // Return top_ref so that it can be saved in lock file used for coordination
683,304✔
920
    return top_ref;
1,361,505✔
921
}
1,361,505✔
922

923

924
void GroupWriter::read_in_freelist()
925
{
1,361,439✔
926
    std::vector<FreeSpaceEntry> free_in_file;
1,361,439✔
927
    size_t evacuation_limit = m_evacuation_limit ? m_evacuation_limit : size_t(-1);
1,358,607✔
928
    REALM_ASSERT(m_free_lengths.is_attached());
1,361,439✔
929
    size_t limit = m_free_lengths.size();
1,361,439✔
930
    REALM_ASSERT_RELEASE_EX(m_free_positions.size() == limit, limit, m_free_positions.size());
1,361,439✔
931
    REALM_ASSERT_RELEASE_EX(m_free_versions.size() == limit, limit, m_free_versions.size());
1,361,439✔
932

683,277✔
933
    if (limit) {
1,361,439✔
934
        auto limit_version = m_oldest_reachable_version;
1,288,062✔
935
        for (size_t idx = 0; idx < limit; ++idx) {
28,123,377✔
936
            size_t ref = size_t(m_free_positions.get(idx));
26,835,315✔
937
            size_t size = size_t(m_free_lengths.get(idx));
26,835,315✔
938

13,420,365✔
939
            uint64_t version = m_free_versions.get(idx);
26,835,315✔
940
            // Entries that are freed in later still alive versions are not candidates for merge or allocation
13,420,365✔
941
            if (version > limit_version) {
26,835,315✔
942
                m_not_free_in_file.emplace_back(ref, size, version);
4,768,674✔
943
                continue;
4,768,674✔
944
            }
4,768,674✔
945
            if (ref + size > evacuation_limit) {
22,066,641✔
946
                if (ref < evacuation_limit) {
270,756✔
947
                    // Split entry
33✔
948
                    size_t still_free_size = evacuation_limit - ref;
69✔
949
                    m_under_evacuation.emplace_back(evacuation_limit, size - still_free_size, 0);
69✔
950
                    size = still_free_size;
69✔
951
                }
69✔
952
                else {
270,687✔
953
                    m_under_evacuation.emplace_back(ref, size, 0);
270,687✔
954
                    continue;
270,687✔
955
                }
270,687✔
956
            }
21,795,954✔
957

10,924,902✔
958
            free_in_file.emplace_back(ref, size, 0);
21,795,954✔
959
        }
21,795,954✔
960

647,112✔
961
        // This will imply a copy-on-write
647,112✔
962
        m_free_positions.clear();
1,288,062✔
963
        m_free_lengths.clear();
1,288,062✔
964
        m_free_versions.clear();
1,288,062✔
965
    }
1,288,062✔
966
    else {
73,377✔
967
        // We need to free the space occupied by the free lists
36,165✔
968
        // If the lists are empty, this has to be done explicitly
36,165✔
969
        // as clear would not copy-on-write an empty array.
36,165✔
970
        m_free_positions.copy_on_write();
73,377✔
971
        m_free_lengths.copy_on_write();
73,377✔
972
        m_free_versions.copy_on_write();
73,377✔
973
    }
73,377✔
974

683,277✔
975
#if REALM_ALLOC_DEBUG
976
    std::cout << "  Freelist (pinned): ";
977
    for (auto e : m_not_free_in_file) {
978
        std::cout << "[" << e.ref << ", " << e.size << "] <" << e.released_at_version << ">  ";
979
    }
980
    std::cout << std::endl;
981
#endif
982

683,277✔
983
    merge_adjacent_entries_in_freelist(m_under_evacuation);
1,361,439✔
984
    m_under_evacuation.erase(std::remove_if(m_under_evacuation.begin(), m_under_evacuation.end(),
1,361,439✔
985
                                            [](const auto& a) {
821,556✔
986
                                                return a.size == 0;
270,756✔
987
                                            }),
270,756✔
988
                             m_under_evacuation.end());
1,361,439✔
989
    merge_adjacent_entries_in_freelist(free_in_file);
1,361,439✔
990
    // Previous step produces - potentially - some entries with size of zero. These
683,277✔
991
    // entries will be skipped in the next step.
683,277✔
992
    move_free_in_file_to_size_map(free_in_file, m_size_map);
1,361,439✔
993
}
1,361,439✔
994

995
size_t GroupWriter::recreate_freelist(size_t reserve_pos)
996
{
1,361,499✔
997
    std::vector<FreeSpaceEntry> free_in_file;
1,361,499✔
998
    auto& new_free_space = m_group.m_alloc.get_free_read_only(); // Throws
1,361,499✔
999
    auto nb_elements =
1,361,499✔
1000
        m_size_map.size() + m_not_free_in_file.size() + m_under_evacuation.size() + new_free_space.size();
1,361,499✔
1001
    free_in_file.reserve(nb_elements);
1,361,499✔
1002

683,328✔
1003
    size_t reserve_ndx = realm::npos;
1,361,499✔
1004

683,328✔
1005
    for (const auto& entry : m_size_map) {
12,835,365✔
1006
        free_in_file.emplace_back(entry.second, entry.first, 0);
12,835,365✔
1007
    }
12,835,365✔
1008

683,328✔
1009
    {
1,361,499✔
1010
        size_t locked_space_size = 0;
1,361,499✔
1011
        for (const auto& locked : m_not_free_in_file) {
4,771,077✔
1012
            free_in_file.emplace_back(locked.ref, locked.size, locked.released_at_version);
4,771,077✔
1013
            locked_space_size += locked.size;
4,771,077✔
1014
        }
4,771,077✔
1015

683,328✔
1016
        for (const auto& free_space : new_free_space) {
9,718,230✔
1017
            free_in_file.emplace_back(free_space.first, free_space.second, m_current_version);
9,718,230✔
1018
            locked_space_size += free_space.second;
9,718,230✔
1019
        }
9,718,230✔
1020
        m_locked_space_size = locked_space_size;
1,361,499✔
1021
    }
1,361,499✔
1022

683,328✔
1023
    for (const auto& elem : m_under_evacuation) {
783,897✔
1024
        free_in_file.emplace_back(elem.ref, elem.size, 0);
195,942✔
1025
    }
195,942✔
1026

683,328✔
1027
    REALM_ASSERT(free_in_file.size() == nb_elements);
1,361,499✔
1028
    std::sort(begin(free_in_file), end(free_in_file), [](auto& a, auto& b) {
166,950,408✔
1029
        return a.ref < b.ref;
166,950,408✔
1030
    });
166,950,408✔
1031

683,328✔
1032
    {
1,361,499✔
1033
        // Copy into arrays while checking consistency
683,328✔
1034
        size_t prev_ref = 0;
1,361,499✔
1035
        size_t prev_size = 0;
1,361,499✔
1036
        size_t free_space_size = 0;
1,361,499✔
1037
        auto limit = free_in_file.size();
1,361,499✔
1038
        for (size_t i = 0; i < limit; ++i) {
28,847,271✔
1039
            const auto& free_space = free_in_file[i];
27,485,772✔
1040
            auto ref = free_space.ref;
27,485,772✔
1041
            if (REALM_UNLIKELY(prev_ref + prev_size > ref)) {
27,485,772✔
1042
                // Check if we are freeing arrays already in 'm_not_free_in_file'
1043
                for (const auto& elem : new_free_space) {
×
1044
                    ref_type free_ref = elem.first;
×
1045
                    size_t free_sz = elem.second;
×
1046
                    for (const auto& locked : m_not_free_in_file) {
×
1047
                        REALM_ASSERT_RELEASE_EX(free_ref < locked.ref || free_ref >= (locked.ref + locked.size),
×
1048
                                                locked.ref, locked.size, locked.released_at_version, free_ref,
×
1049
                                                m_current_version, m_alloc.get_file_path_for_assertions());
×
1050
                        REALM_ASSERT_RELEASE_EX(locked.ref < free_ref || locked.ref >= (free_ref + free_sz),
×
1051
                                                locked.ref, locked.released_at_version, free_ref, free_sz,
×
1052
                                                m_current_version, m_alloc.get_file_path_for_assertions());
×
1053
                    }
×
1054
                }
×
1055

1056
                REALM_ASSERT_RELEASE_EX(prev_ref + prev_size <= ref, prev_ref, prev_size, ref, i, limit,
×
1057
                                        m_alloc.get_file_path_for_assertions());
×
1058
            }
×
1059
            if (reserve_pos == ref) {
27,485,772✔
1060
                reserve_ndx = i;
1,361,517✔
1061
            }
1,361,517✔
1062
            else {
26,124,255✔
1063
                // The reserved chunk should not be counted in now. We don't know how much of it
13,054,224✔
1064
                // will eventually be used.
13,054,224✔
1065
                free_space_size += free_space.size;
26,124,255✔
1066
            }
26,124,255✔
1067
            m_free_positions.add(free_space.ref);
27,485,772✔
1068
            m_free_lengths.add(free_space.size);
27,485,772✔
1069
            m_free_versions.add(free_space.released_at_version);
27,485,772✔
1070
            prev_ref = free_space.ref;
27,485,772✔
1071
            prev_size = free_space.size;
27,485,772✔
1072
        }
27,485,772✔
1073
        REALM_ASSERT_RELEASE(reserve_ndx != realm::npos);
1,361,499✔
1074

683,328✔
1075
        m_free_space_size = free_space_size;
1,361,499✔
1076
    }
1,361,499✔
1077

683,328✔
1078
    return reserve_ndx;
1,361,499✔
1079
}
1,361,499✔
1080

1081
void GroupWriter::merge_adjacent_entries_in_freelist(std::vector<GroupWriter::FreeSpaceEntry>& list)
1082
{
2,722,920✔
1083
    if (list.size() > 1) {
2,722,920✔
1084
        // Combine any adjacent chunks in the freelist
617,826✔
1085
        auto prev = list.begin();
1,227,801✔
1086
        auto end = list.end();
1,227,801✔
1087
        for (auto it = list.begin() + 1; it != end; ++it) {
22,007,139✔
1088
            REALM_ASSERT(it->ref > prev->ref);
20,779,338✔
1089
            if (prev->ref + prev->size == it->ref) {
20,779,338✔
1090
                prev->size += it->size;
2,593,914✔
1091
                it->size = 0;
2,593,914✔
1092
            }
2,593,914✔
1093
            else {
18,185,424✔
1094
                prev = it;
18,185,424✔
1095
            }
18,185,424✔
1096
        }
20,779,338✔
1097
    }
1,227,801✔
1098
}
2,722,920✔
1099

1100
void GroupWriter::move_free_in_file_to_size_map(const std::vector<GroupWriter::FreeSpaceEntry>& list,
1101
                                                std::multimap<size_t, size_t>& size_map)
1102
{
1,361,529✔
1103
    ALLOC_DBG_COUT("  Freelist (true free): ");
1,361,529✔
1104
    for (auto& elem : list) {
21,801,711✔
1105
        // Skip elements merged in 'merge_adjacent_entries_in_freelist'
10,928,796✔
1106
        if (elem.size) {
21,801,711✔
1107
            REALM_ASSERT_RELEASE_EX(!(elem.size & 7), elem.size);
19,280,340✔
1108
            REALM_ASSERT_RELEASE_EX(!(elem.ref & 7), elem.ref);
19,280,340✔
1109
            size_map.emplace(elem.size, elem.ref);
19,280,340✔
1110
            ALLOC_DBG_COUT("[" << elem.ref << ", " << elem.size << "] ");
19,280,340✔
1111
        }
19,280,340✔
1112
    }
21,801,711✔
1113
    ALLOC_DBG_COUT(std::endl);
1,361,529✔
1114
}
1,361,529✔
1115

1116
size_t GroupWriter::get_free_space(size_t size)
1117
{
23,202,327✔
1118
    REALM_ASSERT_3(size % 8, ==, 0); // 8-byte alignment
23,202,327✔
1119

11,539,614✔
1120
    auto p = reserve_free_space(size);
23,202,327✔
1121

11,539,614✔
1122
    // Claim space from identified chunk
11,539,614✔
1123
    size_t chunk_pos = p->second;
23,202,327✔
1124
    size_t chunk_size = p->first;
23,202,327✔
1125
    REALM_ASSERT_3(chunk_size, >=, size);
23,202,327✔
1126
    REALM_ASSERT_RELEASE_EX(!(chunk_pos & 7), chunk_pos);
23,202,327✔
1127
    REALM_ASSERT_RELEASE_EX(!(chunk_size & 7), chunk_size);
23,202,327✔
1128

11,539,614✔
1129
    size_t rest = chunk_size - size;
23,202,327✔
1130
    m_size_map.erase(p);
23,202,327✔
1131
    if (rest > 0) {
23,202,327✔
1132
        // Allocating part of chunk - this alway happens from the beginning
8,294,685✔
1133
        // of the chunk. The call to reserve_free_space may split chunks
8,294,685✔
1134
        // in order to make sure that it returns a chunk from which allocation
8,294,685✔
1135
        // can be done from the beginning
8,294,685✔
1136
        m_size_map.emplace(rest, chunk_pos + size);
16,677,096✔
1137
    }
16,677,096✔
1138
    return chunk_pos;
23,202,327✔
1139
}
23,202,327✔
1140

1141

1142
inline GroupWriter::FreeListElement GroupWriter::split_freelist_chunk(FreeListElement it, size_t alloc_pos)
1143
{
12✔
1144
    size_t start_pos = it->second;
12✔
1145
    size_t chunk_size = it->first;
12✔
1146
    m_size_map.erase(it);
12✔
1147
    REALM_ASSERT_RELEASE_EX(alloc_pos > start_pos, alloc_pos, start_pos);
12✔
1148

6✔
1149
    REALM_ASSERT_RELEASE_EX(!(alloc_pos & 7), alloc_pos);
12✔
1150
    size_t size_first = alloc_pos - start_pos;
12✔
1151
    size_t size_second = chunk_size - size_first;
12✔
1152
    m_size_map.emplace(size_first, start_pos);
12✔
1153
    return m_size_map.emplace(size_second, alloc_pos);
12✔
1154
}
12✔
1155

1156
GroupWriter::FreeListElement GroupWriter::search_free_space_in_free_list_element(FreeListElement it, size_t size)
1157
{
24,577,683✔
1158
    SlabAlloc& alloc = m_group.m_alloc;
24,577,683✔
1159
    size_t chunk_size = it->first;
24,577,683✔
1160

12,235,116✔
1161
    // search through the chunk, finding a place within it,
12,235,116✔
1162
    // where an allocation will not cross a mmap boundary
12,235,116✔
1163
    size_t start_pos = it->second;
24,577,683✔
1164
    size_t alloc_pos = alloc.find_section_in_range(start_pos, chunk_size, size);
24,577,683✔
1165
    if (alloc_pos == 0) {
24,577,683✔
1166
        return m_size_map.end();
51✔
1167
    }
51✔
1168
    // we found a place - if it's not at the beginning of the chunk,
12,235,086✔
1169
    // we split the chunk so that the allocation can be done from the
12,235,086✔
1170
    // beginning of the second chunk.
12,235,086✔
1171
    if (alloc_pos != start_pos) {
24,577,632✔
1172
        it = split_freelist_chunk(it, alloc_pos);
12✔
1173
    }
12✔
1174
    // Match found!
12,235,086✔
1175
    ALLOC_DBG_COUT("    alloc [" << alloc_pos << ", " << size << "]" << std::endl);
24,577,632✔
1176
    return it;
24,577,632✔
1177
}
24,577,632✔
1178

1179
GroupWriter::FreeListElement GroupWriter::search_free_space_in_part_of_freelist(size_t size)
1180
{
24,564,171✔
1181
    auto it = m_size_map.lower_bound(size);
24,564,171✔
1182
    while (it != m_size_map.end()) {
29,874,999✔
1183
        // Accept either a perfect match or a block that is twice the size. Tests have shown
14,876,325✔
1184
        // that this is a good strategy.
14,876,325✔
1185
        if (it->first == size || it->first >= 2 * size) {
29,787,309✔
1186
            auto ret = search_free_space_in_free_list_element(it, size);
24,477,561✔
1187
            if (ret != m_size_map.end()) {
24,477,975✔
1188
                return ret;
24,476,481✔
1189
            }
24,476,481✔
1190
            ++it;
2,147,485,141✔
1191
        }
2,147,485,141✔
1192
        else {
5,309,748✔
1193
            // If block was too small, search for the first that is at least twice as big.
2,699,634✔
1194
            it = m_size_map.lower_bound(2 * size);
5,309,748✔
1195
        }
5,309,748✔
1196
    }
29,787,309✔
1197
    // No match
12,222,555✔
1198
    return m_size_map.end();
12,262,887✔
1199
}
24,564,171✔
1200

1201

1202
GroupWriter::FreeListElement GroupWriter::reserve_free_space(size_t size)
1203
{
24,564,105✔
1204
    auto chunk = search_free_space_in_part_of_freelist(size);
24,564,105✔
1205
    while (chunk == m_size_map.end()) {
24,663,495✔
1206
        if (!m_under_evacuation.empty()) {
99,390✔
1207
            // We have been too aggressive in setting the evacuation limit
33✔
1208
            // Just give up
33✔
1209
            // But first we will release all kept back elements
33✔
1210
            for (auto& elem : m_under_evacuation) {
2,511✔
1211
                m_size_map.emplace(elem.size, elem.ref);
2,511✔
1212
            }
2,511✔
1213
            m_under_evacuation.clear();
90✔
1214
            m_evacuation_limit = 0;
90✔
1215
            m_backoff = 10;
90✔
1216
            if (auto logger = m_group.get_logger()) {
90✔
NEW
1217
                logger->log(util::Logger::Level::detail, "Give up compaction");
×
NEW
1218
            }
×
1219
            chunk = search_free_space_in_part_of_freelist(size);
90✔
1220
        }
90✔
1221
        else {
99,300✔
1222
            // No free space, so we have to extend the file.
57,999✔
1223
            auto new_chunk = extend_free_space(size);
99,300✔
1224
            chunk = search_free_space_in_free_list_element(new_chunk, size);
99,300✔
1225
        }
99,300✔
1226
    }
99,390✔
1227
    return chunk;
24,564,105✔
1228
}
24,564,105✔
1229

1230
// Extend the free space with at least the requested size.
1231
// Due to mmap constraints, the extension can not be guaranteed to
1232
// allow an allocation of the requested size, so multiple calls to
1233
// extend_free_space may be needed, before an allocation can succeed.
1234
GroupWriter::FreeListElement GroupWriter::extend_free_space(size_t requested_size)
1235
{
99,300✔
1236
    // We need to consider the "logical" size of the file here, and not the real
57,999✔
1237
    // size. The real size may have changed without the free space information
57,999✔
1238
    // having been adjusted accordingly. This can happen, for example, if
57,999✔
1239
    // write_group() fails before writing the new top-ref, but after having
57,999✔
1240
    // extended the file size. It can also happen as part of initial file expansion
57,999✔
1241
    // during attach_file().
57,999✔
1242
    size_t logical_file_size = to_size_t(m_group.m_top.get(2) / 2);
99,300✔
1243
    // find minimal new size according to the following growth ratios:
57,999✔
1244
    // at least 100% (doubling) until we reach 1MB, then just grow with 1MB at a time
57,999✔
1245
    uint64_t minimal_new_size = logical_file_size;
99,300✔
1246
    constexpr uint64_t growth_boundary = 1024 * 1024; // 1MB
99,300✔
1247
    if (minimal_new_size < growth_boundary) {
99,300✔
1248
        minimal_new_size *= 2;
94,125✔
1249
    }
94,125✔
1250
    else {
5,175✔
1251
        minimal_new_size += growth_boundary;
5,175✔
1252
    }
5,175✔
1253
    // grow with at least the growth ratio, but if more is required, grow more
57,999✔
1254
    uint64_t required_new_size = logical_file_size + requested_size;
99,300✔
1255
    if (required_new_size > minimal_new_size) {
99,300✔
1256
        minimal_new_size = required_new_size;
66,087✔
1257
    }
66,087✔
1258
    // Ensure that minimal_new_size is less than 3 GB on a 32 bit device
57,999✔
1259
    if (minimal_new_size > (std::numeric_limits<size_t>::max() / 4 * 3)) {
99,300✔
1260
        throw MaximumFileSizeExceeded("GroupWriter cannot extend free space: " + util::to_string(logical_file_size) +
×
1261
                                      " + " + util::to_string(requested_size));
×
1262
    }
×
1263

57,999✔
1264
    // We now know that it is safe to assign size to something of size_t
57,999✔
1265
    // and we know that the following adjustments are safe to perform
57,999✔
1266
    size_t new_file_size = static_cast<size_t>(minimal_new_size);
99,300✔
1267

57,999✔
1268
    // align to page size, but do not cross a section boundary
57,999✔
1269
    size_t next_boundary = m_alloc.align_size_to_section_boundary(new_file_size);
99,300✔
1270
    new_file_size = util::round_up_to_page_size(new_file_size);
99,300✔
1271
    if (new_file_size > next_boundary) {
99,300✔
1272
        // we cannot cross a section boundary. In this case the allocation will
1273
        // likely fail, then retry and we'll allocate anew from the next section
1274
        new_file_size = next_boundary;
×
1275
    }
×
1276
    // The size must be a multiple of 8. This is guaranteed as long as
57,999✔
1277
    // the initial size is a multiple of 8.
57,999✔
1278
    REALM_ASSERT_RELEASE_EX(!(new_file_size & 7), new_file_size);
99,300✔
1279
    REALM_ASSERT_3(logical_file_size, <, new_file_size);
99,300✔
1280

57,999✔
1281
    // Note: resize_file() will call File::prealloc() which may misbehave under
57,999✔
1282
    // race conditions (see documentation of File::prealloc()). Fortunately, no
57,999✔
1283
    // race conditions can occur, because in transactional mode we hold a write
57,999✔
1284
    // lock at this time, and in non-transactional mode it is the responsibility
57,999✔
1285
    // of the user to ensure non-concurrent file mutation.
57,999✔
1286
    m_alloc.resize_file(new_file_size); // Throws
99,300✔
1287
    REALM_ASSERT(new_file_size <= get_file_size());
99,300✔
1288
    ALLOC_DBG_COUT("        ** File extension to " << new_file_size << "     after request for " << requested_size
99,300✔
1289
                                                   << std::endl);
99,300✔
1290

57,999✔
1291
    // as new_file_size is larger than logical_file_size, but known to
57,999✔
1292
    // be representable in a size_t, so is the result:
57,999✔
1293
    size_t chunk_size = new_file_size - logical_file_size;
99,300✔
1294
    REALM_ASSERT_RELEASE_EX(!(chunk_size & 7), chunk_size);
99,300✔
1295
    REALM_ASSERT_RELEASE(chunk_size != 0);
99,300✔
1296
    auto it = m_size_map.emplace(chunk_size, logical_file_size);
99,300✔
1297

57,999✔
1298
    // Update the logical file size
57,999✔
1299
    m_logical_size = new_file_size;
99,300✔
1300
    m_group.m_top.set(Group::s_file_size_ndx, RefOrTagged::make_tagged(m_logical_size));
99,300✔
1301

57,999✔
1302
    // std::cout << "New file size = " << std::hex << m_logical_size << std::dec << std::endl;
57,999✔
1303

57,999✔
1304
    return it;
99,300✔
1305
}
99,300✔
1306

1307
bool inline is_aligned(char* addr)
1308
{
23,662,551✔
1309
    size_t as_binary = reinterpret_cast<size_t>(addr);
23,662,551✔
1310
    return (as_binary & 7) == 0;
23,662,551✔
1311
}
23,662,551✔
1312

1313
ref_type GroupWriter::write_array(const char* data, size_t size, uint32_t checksum)
1314
{
18,213,825✔
1315
    // Get position of free space to write in (expanding file if needed)
9,047,931✔
1316
    size_t pos = get_free_space(size);
18,213,825✔
1317

9,047,931✔
1318
    // Write the block
9,047,931✔
1319
    MapWindow* window = get_window(pos, size);
18,213,825✔
1320
    char* dest_addr = window->translate(pos);
18,213,825✔
1321
    REALM_ASSERT_RELEASE(is_aligned(dest_addr));
18,213,825✔
1322
    window->encryption_read_barrier(dest_addr, size);
18,213,825✔
1323
    memcpy(dest_addr, &checksum, 4);
18,213,825✔
1324
    memcpy(dest_addr + 4, data + 4, size - 4);
18,213,825✔
1325
    window->encryption_write_barrier(dest_addr, size);
18,213,825✔
1326
    // return ref of the written array
9,047,931✔
1327
    ref_type ref = to_ref(pos);
18,213,825✔
1328
    return ref;
18,213,825✔
1329
}
18,213,825✔
1330

1331
template <class T>
1332
void GroupWriter::write_array_at(T* translator, ref_type ref, const char* data, size_t size)
1333
{
5,445,666✔
1334
    size_t pos = size_t(ref);
5,445,666✔
1335

2,733,054✔
1336
    REALM_ASSERT_3(pos + size, <=, to_size_t(m_group.m_top.get(2) / 2));
5,445,666✔
1337
    // REALM_ASSERT_3(pos + size, <=, m_file_map.get_size());
2,733,054✔
1338
    char* dest_addr = translator->translate(pos);
5,445,666✔
1339
    REALM_ASSERT_RELEASE(is_aligned(dest_addr));
5,445,666✔
1340

2,733,054✔
1341
    uint32_t dummy_checksum = 0x41414141UL; // "AAAA" in ASCII
5,445,666✔
1342
    memcpy(dest_addr, &dummy_checksum, 4);
5,445,666✔
1343
    memcpy(dest_addr + 4, data + 4, size - 4);
5,445,666✔
1344
}
5,445,666✔
1345

1346

1347
void GroupWriter::commit(ref_type new_top_ref)
1348
{
1,193,877✔
1349
    using _impl::SimulatedFailure;
1,193,877✔
1350
    SimulatedFailure::trigger(SimulatedFailure::group_writer__commit); // Throws
1,193,877✔
1351

599,514✔
1352
    MapWindow* window = get_window(0, sizeof(SlabAlloc::Header));
1,193,877✔
1353
    SlabAlloc::Header& file_header = *reinterpret_cast<SlabAlloc::Header*>(window->translate(0));
1,193,877✔
1354
    window->encryption_read_barrier(&file_header, sizeof file_header);
1,193,877✔
1355

599,514✔
1356
    // One bit of the flags field selects which of the two top ref slots are in
599,514✔
1357
    // use (same for file format version slots). The current value of the bit
599,514✔
1358
    // reflects the currently bound snapshot, so we need to invert it for the
599,514✔
1359
    // new snapshot. Other bits must remain unchanged.
599,514✔
1360
    unsigned old_flags = file_header.m_flags;
1,193,877✔
1361
    unsigned new_flags = old_flags ^ SlabAlloc::flags_SelectBit;
1,193,877✔
1362
    int slot_selector = ((new_flags & SlabAlloc::flags_SelectBit) != 0 ? 1 : 0);
901,221✔
1363

599,514✔
1364
    // Update top ref and file format version
599,514✔
1365
    int file_format_version = m_group.get_file_format_version();
1,193,877✔
1366
    using type_1 = std::remove_reference<decltype(file_header.m_file_format[0])>::type;
1,193,877✔
1367
    REALM_ASSERT(!util::int_cast_has_overflow<type_1>(file_format_version));
1,193,877✔
1368
    // only write the file format field if necessary (optimization)
599,514✔
1369
    if (type_1(file_format_version) != file_header.m_file_format[slot_selector]) {
1,193,877✔
1370
        // write barrier on the entire `file_header` happens below
25,920✔
1371
        file_header.m_file_format[slot_selector] = type_1(file_format_version);
53,718✔
1372
    }
53,718✔
1373

599,514✔
1374
    // When running the test suite, device synchronization is disabled
599,514✔
1375
    bool disable_sync = get_disable_sync_to_disk() || m_durability == Durability::Unsafe;
1,193,877✔
1376
    file_header.m_top_ref[slot_selector] = new_top_ref;
1,193,877✔
1377

599,514✔
1378
#if REALM_METRICS
1,193,877✔
1379
    std::unique_ptr<MetricTimer> fsync_timer = Metrics::report_fsync_time(m_group);
1,193,877✔
1380
#endif // REALM_METRICS
1,193,877✔
1381

599,514✔
1382
    // Make sure that that all data relating to the new snapshot is written to
599,514✔
1383
    // stable storage before flipping the slot selector
599,514✔
1384
    window->encryption_write_barrier(&file_header, sizeof file_header);
1,193,877✔
1385
    flush_all_mappings();
1,193,877✔
1386
    if (!disable_sync) {
1,193,877✔
1387
        sync_all_mappings();
225✔
1388
        m_alloc.get_file().barrier();
225✔
1389
    }
225✔
1390

599,514✔
1391
    // Flip the slot selector bit.
599,514✔
1392
    window->encryption_read_barrier(&file_header, sizeof file_header);
1,193,877✔
1393
    using type_2 = std::remove_reference<decltype(file_header.m_flags)>::type;
1,193,877✔
1394
    file_header.m_flags = type_2(new_flags);
1,193,877✔
1395

599,514✔
1396
    // Write new selector to disk
599,514✔
1397
    window->encryption_write_barrier(&file_header.m_flags, sizeof(file_header.m_flags));
1,193,877✔
1398
    window->flush();
1,193,877✔
1399
    if (!disable_sync) {
1,193,877✔
1400
        window->sync();
225✔
1401
        m_alloc.get_file().barrier();
225✔
1402
    }
225✔
1403
}
1,193,877✔
1404

1405

1406
#ifdef REALM_DEBUG
1407

1408
void GroupWriter::dump()
1409
{
×
1410
    size_t count = m_free_lengths.size();
×
1411
    std::cout << "count: " << count << ", m_size = " << m_alloc.get_file_size() << ", "
×
1412
              << "version >= " << m_oldest_reachable_version << "\n";
×
1413
    for (size_t i = 0; i < count; ++i) {
×
1414
        std::cout << i << ": " << m_free_positions.get(i) << ", " << m_free_lengths.get(i) << " - "
×
1415
                  << m_free_versions.get(i) << "\n";
×
1416
    }
×
1417
}
×
1418

1419
#endif
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc