• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / github_pull_request_281750

30 Oct 2023 03:37PM UTC coverage: 90.528% (-1.0%) from 91.571%
github_pull_request_281750

Pull #6073

Evergreen

jedelbo
Log free space and history sizes when opening file
Pull Request #6073: Merge next-major

95488 of 175952 branches covered (0.0%)

8973 of 12277 new or added lines in 149 files covered. (73.09%)

622 existing lines in 51 files now uncovered.

233503 of 257934 relevant lines covered (90.53%)

6533720.56 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.91
/src/realm/group_writer.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <algorithm>
20
#include <set>
21

22
#include <iostream>
23

24
#include <realm/group_writer.hpp>
25

26
#include <realm/alloc_slab.hpp>
27
#include <realm/transaction.hpp>
28
#include <realm/disable_sync_to_disk.hpp>
29
#include <realm/impl/destroy_guard.hpp>
30
#include <realm/impl/simulated_failure.hpp>
31
#include <realm/util/miscellaneous.hpp>
32
#include <realm/util/safe_int_ops.hpp>
33

34
using namespace realm;
35
using namespace realm::util;
36

37
namespace realm {
38
class InMemoryWriter : public _impl::ArrayWriterBase {
39
public:
40
    InMemoryWriter(GroupWriter& owner)
41
        : m_owner(owner)
42
        , m_alloc(owner.m_alloc)
43
    {
81,840✔
44
    }
81,840✔
45
    ref_type write_array(const char* data, size_t size, uint32_t checksum) override
46
    {
5,013,786✔
47
        size_t pos = m_owner.get_free_space(size);
5,013,786✔
48

2,506,737✔
49
        // Write the block
2,506,737✔
50
        char* dest_addr = translate(pos);
5,013,786✔
51
        REALM_ASSERT_RELEASE(dest_addr && (reinterpret_cast<size_t>(dest_addr) & 7) == 0);
5,013,786✔
52
        memcpy(dest_addr, &checksum, 4);
5,013,786✔
53
        memcpy(dest_addr + 4, data + 4, size - 4);
5,013,786✔
54
        // return ref of the written array
2,506,737✔
55
        ref_type ref = to_ref(pos);
5,013,786✔
56
        return ref;
5,013,786✔
57
    }
5,013,786✔
58
    char* translate(ref_type ref)
59
    {
5,341,146✔
60
        return m_alloc.translate_memory_pos(ref);
5,341,146✔
61
    }
5,341,146✔
62

63
private:
64
    GroupWriter& m_owner;
65
    SlabAlloc& m_alloc;
66
};
67
} // namespace realm
68

69

70
// Class controlling a memory mapped window into a file
71
class WriteWindowMgr::MapWindow {
72
public:
73
    MapWindow(size_t alignment, util::File& f, ref_type start_ref, size_t initial_size,
74
              util::WriteMarker* write_marker = nullptr);
75
    ~MapWindow();
76

77
    // translate a ref to a pointer
78
    // inside the window defined during construction.
79
    char* translate(ref_type ref);
80
    void encryption_read_barrier(void* start_addr, size_t size);
81
    void encryption_write_barrier(void* start_addr, size_t size);
82
    // flush from private to shared cache
83
    void flush();
84
    // sync to disk (including flush as needed)
85
    void sync();
86
    // return true if the specified range is fully visible through
87
    // the MapWindow
88
    bool matches(ref_type start_ref, size_t size);
89
    // return false if the mapping cannot be extended to hold the
90
    // requested size - extends if possible and then returns true
91
    bool extends_to_match(util::File& f, ref_type start_ref, size_t size);
92

93
private:
94
    util::File::Map<char> m_map;
95
    ref_type m_base_ref;
96
    ref_type aligned_to_mmap_block(ref_type start_ref);
97
    size_t get_window_size(util::File& f, ref_type start_ref, size_t size);
98
    size_t m_alignment;
99
};
100

101
// True if a requested block fall within a memory mapping.
102
bool WriteWindowMgr::MapWindow::matches(ref_type start_ref, size_t size)
103
{
18,258,324✔
104
    if (start_ref < m_base_ref)
18,258,324✔
105
        return false;
×
106
    if (start_ref + size > m_base_ref + m_map.get_size())
18,258,324✔
107
        return false;
23,709✔
108
    return true;
18,234,615✔
109
}
18,234,615✔
110

111
// When determining which part of the file to mmap, We try to pick a 1MB window containing
112
// the requested block. We align windows on 1MB boundaries. We also align window size at
113
// 1MB, except in cases where the referenced part of the file straddles a 1MB boundary.
114
// In that case we choose a larger window.
115
//
116
// In cases where a 1MB window would stretch beyond the end of the file, we choose
117
// a smaller window. Anything mapped after the end of file would be undefined anyways.
118
ref_type WriteWindowMgr::MapWindow::aligned_to_mmap_block(ref_type start_ref)
119
{
2,498,211✔
120
    // align to 1MB boundary
1,268,802✔
121
    size_t page_mask = m_alignment - 1;
2,498,211✔
122
    return start_ref & ~page_mask;
2,498,211✔
123
}
2,498,211✔
124

125
size_t WriteWindowMgr::MapWindow::get_window_size(util::File& f, ref_type start_ref, size_t size)
126
{
2,498,214✔
127
    size_t window_size = start_ref + size - m_base_ref;
2,498,214✔
128
    // always map at least to match alignment
1,268,811✔
129
    if (window_size < m_alignment)
2,498,214✔
130
        window_size = m_alignment;
2,498,220✔
131
    // but never map beyond end of file
1,268,811✔
132
    size_t file_size = to_size_t(f.get_size());
2,498,214✔
133
    REALM_ASSERT_DEBUG_EX(start_ref + size <= file_size, start_ref + size, file_size);
2,498,214✔
134
    if (window_size > file_size - m_base_ref)
2,498,214✔
135
        window_size = file_size - m_base_ref;
2,498,166✔
136
    return window_size;
2,498,214✔
137
}
2,498,214✔
138

139
// The file may grow in increments much smaller than 1MB. This can lead to a stream of requests
140
// which are each just beyond the end of the last mapping we made. It is important to extend the
141
// existing window to cover the new request (if possible) as opposed to adding a new window.
142
// The reason is not obvious: open windows need to be sync'ed to disk at the end of the commit,
143
// and we really want to use as few calls to msync() as possible.
144
//
145
// extends_to_match() will extend an existing mapping to accomodate a new request if possible
146
// and return true. If the request falls in a different 1MB window, it'll return false.
147
bool WriteWindowMgr::MapWindow::extends_to_match(util::File& f, ref_type start_ref, size_t size)
148
{
23,709✔
149
    size_t aligned_ref = aligned_to_mmap_block(start_ref);
23,709✔
150
    if (aligned_ref != m_base_ref)
23,709✔
151
        return false;
×
152
    size_t window_size = get_window_size(f, start_ref, size);
23,709✔
153
    m_map.sync();
23,709✔
154
    m_map.unmap();
23,709✔
155
    m_map.map(f, File::access_ReadWrite, window_size, 0, m_base_ref);
23,709✔
156
    return true;
23,709✔
157
}
23,709✔
158

159
WriteWindowMgr::MapWindow::MapWindow(size_t alignment, util::File& f, ref_type start_ref, size_t size,
160
                                     util::WriteMarker* write_marker)
161
    : m_alignment(alignment)
162
{
2,474,520✔
163
    m_base_ref = aligned_to_mmap_block(start_ref);
2,474,520✔
164
    size_t window_size = get_window_size(f, start_ref, size);
2,474,520✔
165
    m_map.map(f, File::access_ReadWrite, window_size, 0, m_base_ref);
2,474,520✔
166
#if REALM_ENABLE_ENCRYPTION
2,474,520✔
167
    if (auto p = m_map.get_encrypted_mapping())
2,474,520✔
168
        p->set_marker(write_marker);
684✔
169
#else
170
    static_cast<void>(write_marker);
171
#endif
172
}
2,474,520✔
173

174
WriteWindowMgr::MapWindow::~MapWindow()
175
{
2,474,526✔
176
    m_map.sync();
2,474,526✔
177
    m_map.unmap();
2,474,526✔
178
}
2,474,526✔
179

180
void WriteWindowMgr::MapWindow::flush()
181
{
3,666,555✔
182
    m_map.flush();
3,666,555✔
183
}
3,666,555✔
184

185
void WriteWindowMgr::MapWindow::sync()
186
{
1,198,347✔
187
    flush();
1,198,347✔
188
    m_map.sync();
1,198,347✔
189
}
1,198,347✔
190

191
char* WriteWindowMgr::MapWindow::translate(ref_type ref)
192
{
25,850,451✔
193
    return m_map.get_addr() + (ref - m_base_ref);
25,850,451✔
194
}
25,850,451✔
195

196
void WriteWindowMgr::MapWindow::encryption_read_barrier(void* start_addr, size_t size)
197
{
21,924,081✔
198
    realm::util::encryption_read_barrier_for_write(start_addr, size, m_map.get_encrypted_mapping());
21,924,081✔
199
}
21,924,081✔
200

201
void WriteWindowMgr::MapWindow::encryption_write_barrier(void* start_addr, size_t size)
202
{
21,923,319✔
203
    realm::util::encryption_write_barrier(start_addr, size, m_map.get_encrypted_mapping());
21,923,319✔
204
}
21,923,319✔
205

206
WriteWindowMgr::WriteWindowMgr(SlabAlloc& alloc, Durability dura, WriteMarker* write_marker)
207
    : m_alloc(alloc)
208
    , m_durability(dura)
209
    , m_write_marker(write_marker)
210
{
2,556,501✔
211
    m_map_windows.reserve(num_map_windows);
2,556,501✔
212
#if REALM_PLATFORM_APPLE && REALM_MOBILE
213
    m_window_alignment = 1 * 1024 * 1024; // 1M
214
#else
215
    if (sizeof(int*) == 4) {                  // 32 bit address space
2,556,501✔
216
        m_window_alignment = 1 * 1024 * 1024; // 1M
×
217
    }
×
218
    else {
2,556,501✔
219
        // large address space - just choose a size so that we have a single window
1,289,886✔
220
        size_t total_size = m_alloc.get_total_size();
2,556,501✔
221
        size_t wanted_size = 1;
2,556,501✔
222
        while (total_size) {
71,591,058✔
223
            total_size >>= 1;
69,034,557✔
224
            wanted_size <<= 1;
69,034,557✔
225
        }
69,034,557✔
226
        if (wanted_size < 1 * 1024 * 1024)
2,556,501✔
227
            wanted_size = 1 * 1024 * 1024; // minimum 1M
12✔
228
        m_window_alignment = wanted_size;
2,556,501✔
229
    }
2,556,501✔
230
#endif
2,556,501✔
231
}
2,556,501✔
232

233
GroupCommitter::GroupCommitter(Transaction& group, Durability dura, WriteMarker* write_marker)
234
    : m_group(group)
235
    , m_alloc(group.m_alloc)
236
    , m_durability(dura)
237
    , m_window_mgr(group.m_alloc, dura, write_marker)
238
{
1,194,270✔
239
}
1,194,270✔
240

241
GroupCommitter::~GroupCommitter() = default;
1,194,186✔
242

243
GroupWriter::GroupWriter(Transaction& group, Durability dura, WriteMarker* write_marker)
244
    : m_group(group)
245
    , m_alloc(group.m_alloc)
246
    , m_durability(dura)
247
    , m_window_mgr(group.m_alloc, dura, write_marker)
248
    , m_free_positions(m_alloc)
249
    , m_free_lengths(m_alloc)
250
    , m_free_versions(m_alloc)
251
{
1,362,225✔
252
    Array& top = m_group.m_top;
1,362,225✔
253
    m_logical_size = size_t(top.get_as_ref_or_tagged(Group::s_file_size_ndx).get_as_int());
1,362,225✔
254

686,925✔
255
    // When we make a commit, we will at least need room for the version
686,925✔
256
    while (top.size() <= Group::s_version_ndx) {
1,363,977✔
257
        top.add(0); // Throws
1,752✔
258
    }
1,752✔
259

686,925✔
260
    m_free_positions.set_parent(&top, Group::s_free_pos_ndx);
1,362,225✔
261
    m_free_lengths.set_parent(&top, Group::s_free_size_ndx);
1,362,225✔
262
    m_free_versions.set_parent(&top, Group::s_free_version_ndx);
1,362,225✔
263

686,925✔
264
    ref_type free_positions_ref = m_free_positions.get_ref_from_parent();
1,362,225✔
265
    if (free_positions_ref) {
1,362,225✔
266
        m_free_positions.init_from_ref(free_positions_ref);
1,288,797✔
267
    }
1,288,797✔
268
    else {
73,428✔
269
        m_free_positions.create(Array::type_Normal); // Throws
73,428✔
270
        _impl::DestroyGuard<Array> dg(&m_free_positions);
73,428✔
271
        m_free_positions.update_parent(); // Throws
73,428✔
272
        dg.release();
73,428✔
273
    }
73,428✔
274

686,925✔
275
    if (ref_type ref = m_free_lengths.get_ref_from_parent()) {
1,362,225✔
276
        m_free_lengths.init_from_ref(ref);
1,288,821✔
277
        REALM_ASSERT_RELEASE_EX(m_free_positions.size() == m_free_lengths.size(), top.get_ref(),
1,288,821✔
278
                                m_free_positions.size(), m_free_lengths.size());
1,288,821✔
279
    }
1,288,821✔
280
    else {
73,404✔
281
        m_free_lengths.create(Array::type_Normal); // Throws
73,404✔
282
        _impl::DestroyGuard<Array> dg(&m_free_lengths);
73,404✔
283
        m_free_lengths.update_parent(); // Throws
73,404✔
284
        dg.release();
73,404✔
285
    }
73,404✔
286

686,925✔
287
    DB::version_type initial_version = 0;
1,362,225✔
288

686,925✔
289
    if (ref_type ref = m_free_versions.get_ref_from_parent()) {
1,362,225✔
290
        m_free_versions.init_from_ref(ref);
1,288,836✔
291
        REALM_ASSERT_RELEASE_EX(m_free_versions.size() == m_free_lengths.size(), top.get_ref(),
1,288,836✔
292
                                m_free_versions.size(), m_free_lengths.size());
1,288,836✔
293
    }
1,288,836✔
294
    else {
73,389✔
295
        int_fast64_t value = int_fast64_t(initial_version);
73,389✔
296
        top.set(6, 1 + 2 * uint64_t(initial_version)); // Throws
73,389✔
297
        size_t n = m_free_positions.size();
73,389✔
298
        bool context_flag = false;
73,389✔
299
        m_free_versions.create(Array::type_Normal, context_flag, n, value); // Throws
73,389✔
300
        _impl::DestroyGuard<Array> dg(&m_free_versions);
73,389✔
301
        m_free_versions.update_parent(); // Throws
73,389✔
302
        dg.release();
73,389✔
303
    }
73,389✔
304
    m_evacuation_limit = 0;
1,362,225✔
305
    m_backoff = 0;
1,362,225✔
306
}
1,362,225✔
307

308

309
void GroupWriter::sync_according_to_durability()
310
{
1,362,279✔
311
    switch (m_durability) {
1,362,279✔
312
        case Durability::Full:
1,198,524✔
313
        case Durability::Unsafe:
1,200,417✔
314
            m_window_mgr.sync_all_mappings();
1,200,417✔
315
            break;
1,200,417✔
316
        case Durability::MemOnly:
686,991✔
317
            m_window_mgr.flush_all_mappings();
161,862✔
318
    }
1,362,279✔
319
}
1,362,279✔
320

321
GroupWriter::~GroupWriter() = default;
1,362,291✔
322

323
size_t GroupWriter::get_file_size() const noexcept
324
{
99,657✔
325
    auto sz = to_size_t(m_alloc.get_file_size());
99,657✔
326
    return sz;
99,657✔
327
}
99,657✔
328

329
void WriteWindowMgr::flush_all_mappings()
330
{
1,356,105✔
331
    for (const auto& window : m_map_windows) {
1,315,179✔
332
        window->flush();
1,274,232✔
333
    }
1,274,232✔
334
}
1,356,105✔
335

336
void WriteWindowMgr::sync_all_mappings()
337
{
1,200,639✔
338
    if (m_durability == Durability::Unsafe)
1,200,639✔
339
        return;
2,523✔
340
    for (const auto& window : m_map_windows) {
1,198,116✔
341
        window->sync();
1,198,104✔
342
    }
1,198,104✔
343
}
1,198,116✔
344

345
// Get a window matching a request, either creating a new window or reusing an
346
// existing one (possibly extended to accomodate the new request). Maintain a
347
// cache of open windows which are sync'ed and closed following a least recently
348
// used policy. Entries in the cache are kept in MRU order.
349
WriteWindowMgr::MapWindow* WriteWindowMgr::get_window(ref_type start_ref, size_t size)
350
{
20,733,366✔
351
    auto match = std::find_if(m_map_windows.begin(), m_map_windows.end(), [&](const auto& window) {
19,507,560✔
352
        return window->matches(start_ref, size) || window->extends_to_match(m_alloc.get_file(), start_ref, size);
18,257,565✔
353
    });
18,257,565✔
354
    if (match != m_map_windows.end()) {
20,733,366✔
355
        // move matching window to top (to keep LRU order)
9,162,948✔
356
        std::rotate(m_map_windows.begin(), match, match + 1);
18,257,058✔
357
        return m_map_windows[0].get();
18,257,058✔
358
    }
18,257,058✔
359
    // no window found, make room for a new one at the top
1,250,148✔
360
    if (m_map_windows.size() == num_map_windows) {
2,476,308✔
361
        m_map_windows.back()->flush();
×
362
        m_map_windows.pop_back();
×
363
    }
×
364
    auto new_window =
2,476,308✔
365
        std::make_unique<MapWindow>(m_window_alignment, m_alloc.get_file(), start_ref, size, m_write_marker);
2,476,308✔
366
    m_map_windows.insert(m_map_windows.begin(), std::move(new_window));
2,476,308✔
367
    return m_map_windows[0].get();
2,476,308✔
368
}
2,476,308✔
369

370
#define REALM_ALLOC_DEBUG 0
371
#if REALM_ALLOC_DEBUG
372
#define ALLOC_DBG_COUT(args)                                                                                         \
373
    {                                                                                                                \
374
        std::cout << args;                                                                                           \
375
    }
376
#else
377
#define ALLOC_DBG_COUT(args)
378
#endif
379

380
#ifdef REALM_DEBUG
381
void GroupWriter::map_reachable()
382
{
62,238✔
383
    class Collector : public Array::MemUsageHandler {
62,238✔
384
    public:
62,238✔
385
        Collector(std::vector<Reachable>& reachable)
62,238✔
386
            : m_reachable(reachable)
62,238✔
387
        {
675,975✔
388
        }
675,975✔
389
        void handle(ref_type ref, size_t, size_t used) override
62,238✔
390
        {
20,365,293✔
391
            m_reachable.emplace_back(Reachable{ref, used});
20,365,293✔
392
        }
20,365,293✔
393
        std::vector<Reachable>& m_reachable;
62,238✔
394
    };
62,238✔
395
    // collect reachable blocks in all reachable versions
32,466✔
396
    for (auto& [version, info] : m_top_ref_map) {
675,969✔
397
        Collector collector(info.reachable_blocks);
675,969✔
398
        // skip any empty entries
344,829✔
399
        if (info.top_ref == 0)
675,969✔
400
            continue;
6✔
401
        Array array(m_alloc);
675,963✔
402
        array.init_from_ref(info.top_ref);
675,963✔
403
        array.report_memory_usage(collector);
675,963✔
404
        std::sort(info.reachable_blocks.begin(), info.reachable_blocks.end(),
675,963✔
405
                  [](const Reachable& a, const Reachable& b) {
109,957,113✔
406
                      return a.pos < b.pos;
109,957,113✔
407
                  });
109,957,113✔
408
    }
675,963✔
409

32,466✔
410
#if REALM_ALLOC_DEBUG
411
    std::cout << "  Reachable: ";
412
    // this really should be inverted, showing all versions pr entry instead of all entries pr version
413
    for (auto& [version, info] : m_top_ref_map) {
414
        std::cout << std::endl << "    Version: " << version;
415
        for (auto& i : info.reachable_blocks) {
416
            std::cout << std::endl << "      " << i.pos << " - " << i.pos + i.size;
417
        }
418
    }
419
    std::cout << std::endl << "  Backdating:";
420
#endif
421
}
62,238✔
422
#endif
423

424
void GroupWriter::backdate()
425
{
62,238✔
426
    struct FreeList {
62,238✔
427
        Array positions;
62,238✔
428
        Array lengths;
62,238✔
429
        Array versions;
62,238✔
430
        ref_type top_ref;
62,238✔
431
        ref_type logical_file_size;
62,238✔
432
        uint64_t version;
62,238✔
433
        bool initialized = false;
62,238✔
434
        FreeList(Allocator& alloc, ref_type top, ref_type logical_file_size, uint64_t version)
62,238✔
435
            : positions(alloc)
62,238✔
436
            , lengths(alloc)
62,238✔
437
            , versions(alloc)
62,238✔
438
            , top_ref(top)
62,238✔
439
            , logical_file_size(logical_file_size)
62,238✔
440
            , version(version)
62,238✔
441
        {
675,969✔
442
        }
675,969✔
443
    };
62,238✔
444

32,466✔
445

32,466✔
446
    using FreeListMap = std::vector<std::unique_ptr<FreeList>>;
62,238✔
447
    FreeListMap old_freelists;
62,238✔
448
    old_freelists.reserve(m_top_ref_map.size());
62,238✔
449
    for (auto& [version, info] : m_top_ref_map) {
675,975✔
450
        if (version < m_oldest_reachable_version)
675,975✔
451
            continue;
×
452
        auto e = std::make_unique<FreeList>(m_alloc, info.top_ref, info.logical_file_size, version);
675,975✔
453
        old_freelists.push_back(std::move(e));
675,975✔
454
    }
675,975✔
455

32,466✔
456

32,466✔
457
    // little helper: get the youngest version older than given
32,466✔
458
    auto get_earlier = [&](uint64_t version) -> FreeList* {
4,323,660✔
459
        auto it = std::lower_bound(old_freelists.begin(), old_freelists.end(), version,
4,323,660✔
460
                                   [](const std::unique_ptr<FreeList>& e, uint64_t v) {
22,231,512✔
461
                                       return e->version < v;
22,231,512✔
462
                                   });
22,231,512✔
463
        // There will always be at least one freelist:
2,194,614✔
464
        REALM_ASSERT(it != old_freelists.end());
4,323,660✔
465
        REALM_ASSERT(it != old_freelists.begin());
4,323,660✔
466
        --it;
4,323,660✔
467
        REALM_ASSERT((*it)->version < version);
4,323,660✔
468
        return it->get();
4,323,660✔
469
    };
4,323,660✔
470

32,466✔
471

32,466✔
472
    // find (if possible) youngest time stamp in any block in a sequence that fully covers a given one.
32,466✔
473
    auto find_cover_for = [&](const FreeSpaceEntry& entry, FreeList& free_list) -> std::optional<uint64_t> {
4,323,747✔
474
        auto entry_end = std::min(entry.ref + entry.size, free_list.logical_file_size);
4,323,747✔
475
        if (entry.ref >= entry_end) {
4,323,747✔
476
            return 0; // block completely beyond end of file
10,137✔
477
        }
10,137✔
478

2,187,945✔
479
        if (!free_list.initialized) {
4,313,610✔
480
            // setup arrays
312,366✔
481
            free_list.initialized = true;
613,731✔
482
            if (free_list.top_ref) {
613,731✔
483
                Array top_array(m_alloc);
613,731✔
484
                top_array.init_from_ref(free_list.top_ref);
613,731✔
485
                if (top_array.size() > Group::s_free_version_ndx) {
613,731✔
486
                    // we have a freelist with versioning info
312,354✔
487
                    free_list.positions.init_from_ref(top_array.get_as_ref(Group::s_free_pos_ndx));
613,719✔
488
                    free_list.lengths.init_from_ref(top_array.get_as_ref(Group::s_free_size_ndx));
613,719✔
489
                    free_list.versions.init_from_ref(top_array.get_as_ref(Group::s_free_version_ndx));
613,719✔
490
                }
613,719✔
491
            }
613,731✔
492
        }
613,731✔
493

2,187,945✔
494
        if (!free_list.positions.is_attached()) {
4,313,610✔
495
            return {}; // no free list associated with that version
×
496
        }
×
497
        const size_t limit = free_list.positions.size();
4,313,610✔
498
        if (limit == 0) {
4,313,610✔
UNCOV
499
            return {}; // empty freelist
×
UNCOV
500
        }
×
501
        const size_t index = free_list.positions.upper_bound_int(entry.ref) - 1;
4,313,610✔
502
        if (index == size_t(-1)) {
4,313,610✔
503
            return {}; // no free blocks before the 'ref' we are looking for
43,833✔
504
        }
43,833✔
505
        REALM_ASSERT(index < limit); // follows from above
4,269,777✔
506
        const auto start_pos = static_cast<ref_type>(free_list.positions.get(index));
4,269,777✔
507
        REALM_ASSERT(start_pos <= entry.ref);
4,269,777✔
508
        auto end_pos = start_pos + static_cast<ref_type>(free_list.lengths.get(index));
4,269,777✔
509
        if (end_pos <= entry.ref) {
4,269,777✔
510
            return {}; // free block ends before the 'ref' we are looking for
3,744,951✔
511
        }
3,744,951✔
512
        uint64_t found_version = free_list.versions.get(index);
524,826✔
513

270,639✔
514
        // coalesce any subsequent contiguous entries
270,639✔
515
        for (auto next = index + 1;
524,826✔
516
             next < limit && free_list.positions.get(next) == (int64_t)end_pos && end_pos < entry_end; ++next) {
613,236✔
517
            end_pos += static_cast<ref_type>(free_list.lengths.get(next));
88,410✔
518
            // pick youngest (highest) version of blocks
49,836✔
519
            found_version = std::max<uint64_t>(found_version, free_list.versions.get(next));
88,410✔
520
        }
88,410✔
521
        // is the block fully covered by range established above?
270,639✔
522
        if (end_pos < entry_end) {
524,826✔
523
            return {}; // no, it isn't
4,692✔
524
        }
4,692✔
525
        REALM_ASSERT(found_version <= entry.released_at_version);
520,134✔
526
        return found_version;
520,134✔
527
    };
520,134✔
528

32,466✔
529
    // check if a given entry overlaps a reachable block. Only used in debug mode.
32,466✔
530
    auto is_referenced = [&](FreeSpaceEntry& entry) -> bool {
4,233,012✔
531
#ifdef REALM_DEBUG
4,233,012✔
532
        bool referenced = false;
4,233,012✔
533
        ALLOC_DBG_COUT("    Considering [" << entry.ref << ", " << entry.size << "]-" << entry.released_at_version
4,233,012✔
534
                                           << " {");
4,233,012✔
535
        auto end = m_top_ref_map.end();
4,233,012✔
536
        for (auto top_ref_map = m_top_ref_map.begin(); top_ref_map != end && !referenced; ++top_ref_map) {
147,722,937✔
537
            auto info_begin = top_ref_map->second.reachable_blocks.begin();
143,489,925✔
538
            auto info_end = top_ref_map->second.reachable_blocks.end();
143,489,925✔
539
            auto it = std::lower_bound(info_begin, info_end, entry.ref, [](const Reachable& a, size_t val) {
620,940,747✔
540
                return val > a.pos;
620,940,747✔
541
            });
620,940,747✔
542
            if (it != info_end) {
143,489,925✔
543
                if (it != info_begin)
122,027,598✔
544
                    --it;
122,020,719✔
545
                while (it != info_end && it->pos < entry.ref + entry.size) {
244,046,619✔
546
                    if (it->pos + it->size > entry.ref) {
125,812,731✔
547
                        ALLOC_DBG_COUT(top_ref_map->first << " ");
3,793,710✔
548
                        referenced = true;
3,793,710✔
549
                        break;
3,793,710✔
550
                    }
3,793,710✔
551
                    ++it;
122,019,021✔
552
                }
122,019,021✔
553
            }
122,027,598✔
554
        }
143,489,925✔
555
        if (!referenced) {
4,233,012✔
556
            ALLOC_DBG_COUT("none");
439,590✔
557
        }
439,590✔
558
        ALLOC_DBG_COUT("} ");
4,233,012✔
559
        return referenced;
4,233,012✔
560
#else
561
        static_cast<void>(entry); // silence a warning
562
        return false;
563
#endif
564
    };
4,233,012✔
565

32,466✔
566
    auto backdate_single_entry = [&](FreeSpaceEntry& entry) -> void {
4,233,021✔
567
        const auto referenced = is_referenced(entry);
4,233,021✔
568
        // early out if the reference is to the most recent version
2,148,144✔
569
        if (entry.released_at_version == m_current_version) {
4,233,021✔
570
            REALM_ASSERT_DEBUG(!referenced);
×
571
            return;
×
572
        }
×
573
        while (entry.released_at_version) {
4,763,040✔
574
            // early out for references before oldest freelist:
2,292,822✔
575
            if (entry.released_at_version <= this->m_oldest_reachable_version) {
4,510,674✔
576
                REALM_ASSERT_DEBUG(!referenced);
187,101✔
577
                break;
187,101✔
578
            }
187,101✔
579
            auto earlier_it = get_earlier(entry.released_at_version);
4,323,573✔
580
            ALLOC_DBG_COUT(" - earlier freelist: " << earlier_it->version);
4,323,573✔
581
            if (auto covering_version = find_cover_for(entry, *earlier_it)) {
4,323,573✔
582
                ALLOC_DBG_COUT("  backdating [" << entry.ref << ", " << entry.size << "]  version: "
530,019✔
583
                                                << entry.released_at_version << " -> " << *covering_version);
530,019✔
584
                REALM_ASSERT_DEBUG(!referenced);
530,019✔
585
                entry.released_at_version = *covering_version;
530,019✔
586
            }
530,019✔
587
            else {
3,793,554✔
588
                ALLOC_DBG_COUT("  not free at that point");
3,793,554✔
589
                REALM_ASSERT_DEBUG(referenced);
3,793,554✔
590
                break;
3,793,554✔
591
            }
3,793,554✔
592
        }
4,323,573✔
593
        ALLOC_DBG_COUT(std::endl);
4,233,021✔
594
    };
4,233,021✔
595

32,466✔
596

32,466✔
597
#ifdef REALM_DEBUG
62,238✔
598
    map_reachable();
62,238✔
599
#endif
62,238✔
600
    for (auto&& entry : m_not_free_in_file) {
4,233,039✔
601
        backdate_single_entry(entry);
4,233,039✔
602
    }
4,233,039✔
603
}
62,238✔
604

605
void GroupWriter::prepare_evacuation()
606
{
1,362,204✔
607
    Array& top = m_group.m_top;
1,362,204✔
608
    if (top.size() > Group::s_evacuation_point_ndx) {
1,362,204✔
609
        if (auto val = top.get(Group::s_evacuation_point_ndx)) {
9,438✔
610
            Array arr(m_alloc);
6,381✔
611
            if (val & 1) {
6,381✔
612
                m_evacuation_limit = size_t(val >> 1);
117✔
613
                arr.create(Node::type_Normal);
117✔
614
                arr.add(uint64_t(m_evacuation_limit));
117✔
615
                arr.add(0); // Backoff = false
117✔
616
                top.set_as_ref(Group::s_evacuation_point_ndx, arr.get_ref());
117✔
617
            }
117✔
618
            else {
6,264✔
619
                arr.init_from_ref(to_ref(val));
6,264✔
620
                auto sz = arr.size();
6,264✔
621
                REALM_ASSERT(sz >= 2);
6,264✔
622
                m_evacuation_limit = size_t(arr.get(0));
6,264✔
623
                m_backoff = arr.get(1);
6,264✔
624
                if (m_backoff > 0) {
6,264✔
625
                    --m_backoff;
1,050✔
626
                }
1,050✔
627
                else {
5,214✔
628
                    for (size_t i = 2; i < sz; i++) {
41,646✔
629
                        m_evacuation_progress.push_back(size_t(arr.get(i)));
36,432✔
630
                    }
36,432✔
631
                }
5,214✔
632
                // We give up if the freelists were allocated above the evacuation limit
2,850✔
633
                if (m_evacuation_limit > 0 && m_free_positions.get_ref() > m_evacuation_limit) {
6,264✔
634
                    // Wait 10 commits until trying again
21✔
635
                    m_backoff = 10;
81✔
636
                    m_evacuation_limit = 0;
81✔
637
                    if (auto logger = m_group.get_logger()) {
81✔
638
                        logger->log(util::Logger::Level::detail, "Give up compaction");
×
639
                    }
×
640
                }
81✔
641
            }
6,264✔
642
        }
6,381✔
643
    }
9,438✔
644
}
1,362,204✔
645

646
ref_type GroupWriter::write_group()
647
{
1,362,252✔
648
    ALLOC_DBG_COUT("Commit nr " << m_current_version << "   ( from " << m_oldest_reachable_version << " )"
1,362,252✔
649
                                << std::endl);
1,362,252✔
650

686,931✔
651
    read_in_freelist();
1,362,252✔
652
    // Now, 'm_size_map' holds all free elements candidate for recycling
686,931✔
653

686,931✔
654
    Array& top = m_group.m_top;
1,362,252✔
655
    ALLOC_DBG_COUT("  Allocating file space for data:" << std::endl);
1,362,252✔
656

686,931✔
657
    // Recursively write all changed arrays (but not 'top' and free-lists yet,
686,931✔
658
    // as they are going to change along the way.) If free space is available in
686,931✔
659
    // the attached database file, we use it, but this does not include space
686,931✔
660
    // that has been release during the current transaction (or since the last
686,931✔
661
    // commit), as that would lead to clobbering of the previous database
686,931✔
662
    // version.
686,931✔
663
    bool deep = true, only_if_modified = true;
1,362,252✔
664
    std::unique_ptr<InMemoryWriter> in_memory_writer;
1,362,252✔
665
    _impl::ArrayWriterBase* writer = this;
1,362,252✔
666
    if (m_alloc.is_in_memory()) {
1,362,252✔
667
        in_memory_writer = std::make_unique<InMemoryWriter>(*this);
81,840✔
668
        writer = in_memory_writer.get();
81,840✔
669
    }
81,840✔
670
    ref_type names_ref = m_group.m_table_names.write(*writer, deep, only_if_modified); // Throws
1,362,252✔
671
    ref_type tables_ref = m_group.m_tables.write(*writer, deep, only_if_modified);     // Throws
1,362,252✔
672

686,931✔
673
    int_fast64_t value_1 = from_ref(names_ref);
1,362,252✔
674
    int_fast64_t value_2 = from_ref(tables_ref);
1,362,252✔
675
    top.set(0, value_1); // Throws
1,362,252✔
676
    top.set(1, value_2); // Throws
1,362,252✔
677

686,931✔
678
    // If file has a history and is opened in shared mode, write the new history
686,931✔
679
    // to the file. If the file has a history, but si not opened in shared mode,
686,931✔
680
    // discard the history, as it could otherwise be left in an inconsistent state.
686,931✔
681
    if (top.size() > Group::s_hist_ref_ndx) {
1,362,252✔
682
        if (ref_type history_ref = top.get_as_ref(Group::s_hist_ref_ndx)) {
1,329,516✔
683
            Allocator& alloc = top.get_alloc();
556,290✔
684
            ref_type new_history_ref = Array::write(history_ref, alloc, *writer, only_if_modified); // Throws
556,290✔
685
            top.set(Group::s_hist_ref_ndx, from_ref(new_history_ref));                              // Throws
556,290✔
686
        }
556,290✔
687
    }
1,329,516✔
688
    if (top.size() > Group::s_evacuation_point_ndx) {
1,362,252✔
689
        ref_type ref = top.get_as_ref(Group::s_evacuation_point_ndx);
9,438✔
690
        if (m_evacuation_limit || m_backoff) {
9,438✔
691
            REALM_ASSERT(ref);
6,303✔
692
            Array arr(m_alloc);
6,303✔
693
            arr.init_from_ref(ref);
6,303✔
694
            arr.truncate(2);
6,303✔
695

2,868✔
696
            arr.set(0, int64_t(m_evacuation_limit));
6,303✔
697
            if (m_backoff == 0 && m_evacuation_progress.empty()) {
6,303✔
698
                // We have done a scan - Now we should just wait for the nodes still
33✔
699
                // being in the evacuation zone being released by the transactions
33✔
700
                // still holding on to them. This could take many commits.
33✔
701
                m_backoff = 1000;
105✔
702
            }
105✔
703
            arr.set(1, m_backoff); // Backoff from scanning
6,303✔
704
            for (auto index : m_evacuation_progress) {
36,504✔
705
                arr.add(int64_t(index));
36,504✔
706
            }
36,504✔
707
            ref = arr.write(*writer, false, only_if_modified);
6,303✔
708
            top.set_as_ref(Group::s_evacuation_point_ndx, ref);
6,303✔
709
        }
6,303✔
710
        else if (ref) {
3,135✔
711
            Array::destroy(ref, m_alloc);
78✔
712
            top.set(Group::s_evacuation_point_ndx, 0);
78✔
713
        }
78✔
714
    }
9,438✔
715

686,931✔
716
    ALLOC_DBG_COUT("  Freelist size after allocations: " << m_size_map.size() << std::endl);
1,362,252✔
717
    // We now back-date (if possible) any blocks freed in versions which
686,931✔
718
    // are becoming unreachable.
686,931✔
719
    if (m_any_new_unreachables)
1,362,252✔
720
        backdate();
62,238✔
721

686,931✔
722
    // We now have a bit of a chicken-and-egg problem. We need to write the
686,931✔
723
    // free-lists to the file, but the act of writing them will consume free
686,931✔
724
    // space, and thereby change the free-lists. To solve this problem, we
686,931✔
725
    // calculate an upper bound on the amount af space required for all of the
686,931✔
726
    // remaining arrays and allocate the space as one big chunk. This way we can
686,931✔
727
    // finalize the free-lists before writing them to the file.
686,931✔
728
    size_t max_free_list_size = m_size_map.size();
1,362,252✔
729

686,931✔
730
    // We need to add to the free-list any space that was freed during the
686,931✔
731
    // current transaction, but to avoid clobering the previous version, we
686,931✔
732
    // cannot add it yet. Instead we simply account for the space
686,931✔
733
    // required. Since we will modify the free-lists themselves, we must ensure
686,931✔
734
    // that the original arrays used by the free-lists are counted as part of
686,931✔
735
    // the space that was freed during the current transaction. Note that a
686,931✔
736
    // copy-on-write on m_free_positions, for example, also implies a
686,931✔
737
    // copy-on-write on Group::m_top.
686,931✔
738
    ALLOC_DBG_COUT("  In-mem freelist before/after consolidation: " << m_group.m_alloc.m_free_read_only.size());
1,362,252✔
739
    size_t free_read_only_size = m_group.m_alloc.consolidate_free_read_only(); // Throws
1,362,252✔
740
    ALLOC_DBG_COUT("/" << free_read_only_size << std::endl);
1,362,252✔
741
    max_free_list_size += free_read_only_size;
1,362,252✔
742
    max_free_list_size += m_not_free_in_file.size();
1,362,252✔
743
    max_free_list_size += m_under_evacuation.size();
1,362,252✔
744
    // The final allocation of free space (i.e., the call to
686,931✔
745
    // reserve_free_space() below) may add extra entries to the free-lists.
686,931✔
746
    // We reserve room for the worst case scenario, which is as follows:
686,931✔
747
    // If the database has *max* theoretical fragmentation, it'll need one
686,931✔
748
    // entry in the free list for every 16 bytes, because both allocated and
686,931✔
749
    // free chunks are at least 8 bytes in size. For databases smaller than 2Gb
686,931✔
750
    // each free list entry requires 16 bytes (4 for the position, 4 for the
686,931✔
751
    // size and 8 for the version). The worst case scenario thus needs access
686,931✔
752
    // to a contiguous address range equal to existing database size.
686,931✔
753
    // This growth requires at the most 8 extension steps, each adding one entry
686,931✔
754
    // to the free list. The worst case occurs when you will have to expand the
686,931✔
755
    // size to over 2 GB where each entry suddenly requires 24 bytes. In this
686,931✔
756
    // case you will need 2 extra steps.
686,931✔
757
    // Another limit is due to the fact than an array holds less than 0x1000000
686,931✔
758
    // entries, so the total free list size will be less than 0x16000000. So for
686,931✔
759
    // bigger databases the space required for free lists will be relatively less.
686,931✔
760
    max_free_list_size += 10;
1,362,252✔
761

686,931✔
762
    size_t max_free_space_needed =
1,362,252✔
763
        Array::get_max_byte_size(top.size()) + size_per_free_list_entry() * max_free_list_size;
1,362,252✔
764

686,931✔
765
    ALLOC_DBG_COUT("  Allocating file space for freelists:" << std::endl);
1,362,252✔
766
    // Reserve space for remaining arrays. We ask for some extra bytes beyond the
686,931✔
767
    // maximum number that is required. This ensures that even if we end up
686,931✔
768
    // using the maximum size possible, we still do not end up with a zero size
686,931✔
769
    // free-space chunk as we deduct the actually used size from it.
686,931✔
770
    auto reserve = reserve_free_space(max_free_space_needed + 8); // Throws
1,362,252✔
771
    size_t reserve_pos = reserve->second;
1,362,252✔
772
    size_t reserve_size = reserve->first;
1,362,252✔
773

686,931✔
774
    // Now we can check, if we can reduce the logical file size. This can be done
686,931✔
775
    // when there is only one block in m_under_evacuation, which means that all
686,931✔
776
    // nodes in this range have been moved
686,931✔
777
    if (m_under_evacuation.size() == 1) {
1,362,252✔
778
        auto& elem = m_under_evacuation.back();
30✔
779
        if (elem.ref + elem.size == m_logical_size) {
30✔
780
            // This is at the end of the file
15✔
781
            size_t pos = elem.ref;
30✔
782
            m_logical_size = util::round_up_to_page_size(pos);
30✔
783
            elem.size = (m_logical_size - pos);
30✔
784
            if (elem.size == 0)
30✔
785
                m_under_evacuation.clear();
30✔
786
            top.set(Group::s_file_size_ndx, RefOrTagged::make_tagged(m_logical_size));
30✔
787
            auto ref = top.get_as_ref(Group::s_evacuation_point_ndx);
30✔
788
            REALM_ASSERT(ref);
30✔
789
            Array::destroy(ref, m_alloc);
30✔
790
            top.set(Group::s_evacuation_point_ndx, 0);
30✔
791
            m_evacuation_limit = 0;
30✔
792

15✔
793
            if (auto logger = m_group.get_logger()) {
30✔
794
                logger->log(util::Logger::Level::detail, "New logical size %1", m_logical_size);
12✔
795
            }
12✔
796
        }
30✔
797
    }
30✔
798

686,931✔
799
    // At this point we have allocated all the space we need, so we can add to
686,931✔
800
    // the free-lists any free space created during the current transaction (or
686,931✔
801
    // since last commit). Had we added it earlier, we would have risked
686,931✔
802
    // clobbering the previous database version. Note, however, that this risk
686,931✔
803
    // would only have been present in the non-transactional case where there is
686,931✔
804
    // no version tracking on the free-space chunks.
686,931✔
805

686,931✔
806
    // Now, let's update the realm-style freelists, which will later be written to file.
686,931✔
807
    // Function returns index of element holding the space reserved for the free
686,931✔
808
    // lists in the file.
686,931✔
809
    size_t reserve_ndx = recreate_freelist(reserve_pos);
1,362,252✔
810

686,931✔
811
    ALLOC_DBG_COUT("  Freelist size after merge: " << m_free_positions.size() << "   freelist space required: "
1,362,252✔
812
                                                   << max_free_space_needed << std::endl);
1,362,252✔
813
    // Before we calculate the actual sizes of the free-list arrays, we must
686,931✔
814
    // make sure that the final adjustments of the free lists (i.e., the
686,931✔
815
    // deduction of the actually used space from the reserved chunk,) will not
686,931✔
816
    // change the byte-size of those arrays.
686,931✔
817
    // size_t reserve_pos = to_size_t(m_free_positions.get(reserve_ndx));
686,931✔
818
    REALM_ASSERT_3(reserve_size, >, max_free_space_needed);
1,362,252✔
819
    int_fast64_t value_4 = to_int64(reserve_pos + max_free_space_needed);
1,362,252✔
820

686,931✔
821
#if REALM_ENABLE_MEMDEBUG
822
    m_free_positions.m_no_relocation = true;
823
    m_free_lengths.m_no_relocation = true;
824
#endif
825

686,931✔
826
    // Ensure that this arrays does not reposition itself
686,931✔
827
    m_free_positions.ensure_minimum_width(value_4); // Throws
1,362,252✔
828

686,931✔
829
    // Get final sizes of free-list arrays
686,931✔
830
    size_t free_positions_size = m_free_positions.get_byte_size();
1,362,252✔
831
    size_t free_sizes_size = m_free_lengths.get_byte_size();
1,362,252✔
832
    size_t free_versions_size = m_free_versions.get_byte_size();
1,362,252✔
833
    REALM_ASSERT(Array::get_wtype_from_header(Array::get_header_from_data(m_free_versions.m_data)) ==
1,362,252✔
834
                 Array::wtype_Bits);
1,362,252✔
835

686,931✔
836
    // Calculate write positions
686,931✔
837
    ref_type reserve_ref = to_ref(reserve_pos);
1,362,252✔
838
    ref_type free_positions_ref = reserve_ref;
1,362,252✔
839
    ref_type free_sizes_ref = free_positions_ref + free_positions_size;
1,362,252✔
840
    ref_type free_versions_ref = free_sizes_ref + free_sizes_size;
1,362,252✔
841
    ref_type top_ref = free_versions_ref + free_versions_size;
1,362,252✔
842

686,931✔
843
    // Update top to point to the calculated positions
686,931✔
844
    top.set(Group::s_free_pos_ndx, from_ref(free_positions_ref));               // Throws
1,362,252✔
845
    top.set(Group::s_free_size_ndx, from_ref(free_sizes_ref));                  // Throws
1,362,252✔
846
    top.set(Group::s_free_version_ndx, from_ref(free_versions_ref));            // Throws
1,362,252✔
847
    top.set(Group::s_version_ndx, RefOrTagged::make_tagged(m_current_version)); // Throws
1,362,252✔
848

686,931✔
849
    // Compacting files smaller than 1 Mb is not worth the effort. Arbitrary chosen value.
686,931✔
850
    static constexpr size_t minimal_compaction_size = 0x100000;
1,362,252✔
851
    if (m_logical_size >= minimal_compaction_size && m_evacuation_limit == 0 && m_backoff == 0) {
1,362,252✔
852
        // We might have allocated a bigger chunk than needed for the free lists, so if we
13,125✔
853
        // add what we have reserved and subtract what was requested, we get a better measure
13,125✔
854
        // for what will be free eventually. Also subtract the locked space as this is not
13,125✔
855
        // actually free.
13,125✔
856
        size_t free_space = m_free_space_size + reserve_size - max_free_space_needed - m_locked_space_size;
25,569✔
857
        REALM_ASSERT(m_logical_size > free_space);
25,569✔
858
        size_t used_space = m_logical_size - free_space;
25,569✔
859
        if (free_space > 2 * used_space) {
25,569✔
860
            // Clean up potential
42✔
861
            auto limit = util::round_up_to_page_size(used_space + used_space / 2);
120✔
862

42✔
863
            // If we make the file too small, there is a big chance it will grow immediately afterwards
42✔
864
            static constexpr size_t minimal_evac_limit = 0x10000;
120✔
865
            m_evacuation_limit = std::max(minimal_evac_limit, limit);
120✔
866

42✔
867
            // From now on, we will only allocate below this limit
42✔
868
            // Save the limit in the file
42✔
869
            while (top.size() <= Group::s_evacuation_point_ndx) {
156✔
870
                top.add(0);
36✔
871
            }
36✔
872
            top.set(Group::s_evacuation_point_ndx, RefOrTagged::make_tagged(m_evacuation_limit));
120✔
873
            if (auto logger = m_group.get_logger()) {
120✔
874
                logger->log(util::Logger::Level::detail, "Start compaction with limit %1", m_evacuation_limit);
12✔
875
            }
12✔
876
        }
120✔
877
    }
25,569✔
878

686,931✔
879
    // Get final sizes
686,931✔
880
    size_t top_byte_size = top.get_byte_size();
1,362,252✔
881
    ref_type end_ref = top_ref + top_byte_size;
1,362,252✔
882
    REALM_ASSERT_3(size_t(end_ref), <=, reserve_pos + max_free_space_needed);
1,362,252✔
883

686,931✔
884
    // Deduct the used space from the reserved chunk. Note that we have made
686,931✔
885
    // sure that the remaining size is never zero. Also, by the call to
686,931✔
886
    // m_free_positions.ensure_minimum_width() above, we have made sure that
686,931✔
887
    // m_free_positions has the capacity to store the new larger value without
686,931✔
888
    // reallocation.
686,931✔
889
    size_t rest = reserve_pos + reserve_size - size_t(end_ref);
1,362,252✔
890
    size_t used = size_t(end_ref) - reserve_pos;
1,362,252✔
891
    REALM_ASSERT_3(rest, >, 0);
1,362,252✔
892
    int_fast64_t value_8 = from_ref(end_ref);
1,362,252✔
893
    int_fast64_t value_9 = to_int64(rest);
1,362,252✔
894

686,931✔
895
    // value_9 is guaranteed to be smaller than the existing entry in the array and hence will not cause bit
686,931✔
896
    // expansion
686,931✔
897
    REALM_ASSERT_3(value_8, <=, Array::ubound_for_width(m_free_positions.get_width()));
1,362,252✔
898
    REALM_ASSERT_3(value_9, <=, Array::ubound_for_width(m_free_lengths.get_width()));
1,362,252✔
899

686,931✔
900
    m_free_positions.set(reserve_ndx, value_8); // Throws
1,362,252✔
901
    m_free_lengths.set(reserve_ndx, value_9);   // Throws
1,362,252✔
902
    m_free_space_size += rest;
1,362,252✔
903

686,931✔
904
#if REALM_ALLOC_DEBUG
905
    std::cout << "  Final Freelist:" << std::endl;
906
    for (size_t j = 0; j < m_free_positions.size(); ++j) {
907
        std::cout << "    [" << m_free_positions.get(j) << ".." << m_free_lengths.get(j);
908
        if (m_free_versions.size()) {
909
            std::cout << "]: " << m_free_versions.get(j);
910
        }
911
    }
912
    std::cout << std::endl << std::endl;
913
#endif
914

686,931✔
915
    // The free-list now have their final form, so we can write them to the file
686,931✔
916
    // char* start_addr = m_file_map.get_addr() + reserve_ref;
686,931✔
917
    if (m_alloc.is_in_memory()) {
1,362,252✔
918
        auto translator = in_memory_writer.get();
81,840✔
919
        write_array_at(translator, free_positions_ref, m_free_positions.get_header(), free_positions_size); // Throws
81,840✔
920
        write_array_at(translator, free_sizes_ref, m_free_lengths.get_header(), free_sizes_size);           // Throws
81,840✔
921
        write_array_at(translator, free_versions_ref, m_free_versions.get_header(), free_versions_size);    // Throws
81,840✔
922

40,920✔
923
        // Write top
40,920✔
924
        write_array_at(translator, top_ref, top.get_header(), top_byte_size); // Throws
81,840✔
925
    }
81,840✔
926
    else {
1,280,412✔
927
        MapWindow* window = m_window_mgr.get_window(reserve_ref, end_ref - reserve_ref);
1,280,412✔
928
        char* start_addr = window->translate(reserve_ref);
1,280,412✔
929
        window->encryption_read_barrier(start_addr, used);
1,280,412✔
930
        write_array_at(window, free_positions_ref, m_free_positions.get_header(), free_positions_size); // Throws
1,280,412✔
931
        write_array_at(window, free_sizes_ref, m_free_lengths.get_header(), free_sizes_size);           // Throws
1,280,412✔
932
        write_array_at(window, free_versions_ref, m_free_versions.get_header(), free_versions_size);    // Throws
1,280,412✔
933
        REALM_ASSERT_EX(
1,280,412✔
934
            free_positions_ref >= reserve_ref && free_positions_ref + free_positions_size <= reserve_ref + used,
1,280,412✔
935
            reserve_ref, reserve_ref + used, free_positions_ref, free_positions_ref + free_positions_size, top_ref);
1,280,412✔
936
        REALM_ASSERT_EX(free_sizes_ref >= reserve_ref && free_sizes_ref + free_sizes_size <= reserve_ref + used,
1,280,412✔
937
                        reserve_ref, reserve_ref + used, free_sizes_ref, free_sizes_ref + free_sizes_size, top_ref);
1,280,412✔
938
        REALM_ASSERT_EX(
1,280,412✔
939
            free_versions_ref >= reserve_ref && free_versions_ref + free_versions_size <= reserve_ref + used,
1,280,412✔
940
            reserve_ref, reserve_ref + used, free_versions_ref, free_versions_ref + free_versions_size, top_ref);
1,280,412✔
941

646,011✔
942

646,011✔
943
        // Write top
646,011✔
944
        write_array_at(window, top_ref, top.get_header(), top_byte_size); // Throws
1,280,412✔
945
        window->encryption_write_barrier(start_addr, used);
1,280,412✔
946
    }
1,280,412✔
947
    // Return top_ref so that it can be saved in lock file used for coordination
686,931✔
948
    return top_ref;
1,362,252✔
949
}
1,362,252✔
950

951

952
void GroupWriter::read_in_freelist()
953
{
1,362,231✔
954
    std::vector<FreeSpaceEntry> free_in_file;
1,362,231✔
955
    size_t evacuation_limit = m_evacuation_limit ? m_evacuation_limit : size_t(-1);
1,359,378✔
956
    REALM_ASSERT(m_free_lengths.is_attached());
1,362,231✔
957
    size_t limit = m_free_lengths.size();
1,362,231✔
958
    REALM_ASSERT_RELEASE_EX(m_free_positions.size() == limit, limit, m_free_positions.size());
1,362,231✔
959
    REALM_ASSERT_RELEASE_EX(m_free_versions.size() == limit, limit, m_free_versions.size());
1,362,231✔
960

686,916✔
961
    if (limit) {
1,362,231✔
962
        auto limit_version = m_oldest_reachable_version;
1,288,626✔
963
        for (size_t idx = 0; idx < limit; ++idx) {
28,409,052✔
964
            size_t ref = size_t(m_free_positions.get(idx));
27,120,426✔
965
            size_t size = size_t(m_free_lengths.get(idx));
27,120,426✔
966

13,738,098✔
967
            uint64_t version = m_free_versions.get(idx);
27,120,426✔
968
            // Entries that are freed in later still alive versions are not candidates for merge or allocation
13,738,098✔
969
            if (version > limit_version) {
27,120,426✔
970
                m_not_free_in_file.emplace_back(ref, size, version);
4,930,065✔
971
                continue;
4,930,065✔
972
            }
4,930,065✔
973
            if (ref + size > evacuation_limit) {
22,190,361✔
974
                if (ref < evacuation_limit) {
267,249✔
975
                    // Split entry
21✔
976
                    size_t still_free_size = evacuation_limit - ref;
75✔
977
                    m_under_evacuation.emplace_back(evacuation_limit, size - still_free_size, 0);
75✔
978
                    size = still_free_size;
75✔
979
                }
75✔
980
                else {
267,174✔
981
                    m_under_evacuation.emplace_back(ref, size, 0);
267,174✔
982
                    continue;
267,174✔
983
                }
267,174✔
984
            }
21,923,187✔
985

11,076,381✔
986
            free_in_file.emplace_back(ref, size, 0);
21,923,187✔
987
        }
21,923,187✔
988

650,631✔
989
        // This will imply a copy-on-write
650,631✔
990
        m_free_positions.clear();
1,288,626✔
991
        m_free_lengths.clear();
1,288,626✔
992
        m_free_versions.clear();
1,288,626✔
993
    }
1,288,626✔
994
    else {
73,605✔
995
        // We need to free the space occupied by the free lists
36,285✔
996
        // If the lists are empty, this has to be done explicitly
36,285✔
997
        // as clear would not copy-on-write an empty array.
36,285✔
998
        m_free_positions.copy_on_write();
73,605✔
999
        m_free_lengths.copy_on_write();
73,605✔
1000
        m_free_versions.copy_on_write();
73,605✔
1001
    }
73,605✔
1002

686,916✔
1003
#if REALM_ALLOC_DEBUG
1004
    std::cout << "  Freelist (pinned): ";
1005
    for (auto e : m_not_free_in_file) {
1006
        std::cout << "[" << e.ref << ", " << e.size << "] <" << e.released_at_version << ">  ";
1007
    }
1008
    std::cout << std::endl;
1009
#endif
1010

686,916✔
1011
    merge_adjacent_entries_in_freelist(m_under_evacuation);
1,362,231✔
1012
    m_under_evacuation.erase(std::remove_if(m_under_evacuation.begin(), m_under_evacuation.end(),
1,362,231✔
1013
                                            [](const auto& a) {
822,825✔
1014
                                                return a.size == 0;
267,249✔
1015
                                            }),
267,249✔
1016
                             m_under_evacuation.end());
1,362,231✔
1017
    merge_adjacent_entries_in_freelist(free_in_file);
1,362,231✔
1018
    // Previous step produces - potentially - some entries with size of zero. These
686,916✔
1019
    // entries will be skipped in the next step.
686,916✔
1020
    move_free_in_file_to_size_map(free_in_file, m_size_map);
1,362,231✔
1021
}
1,362,231✔
1022

1023
size_t GroupWriter::recreate_freelist(size_t reserve_pos)
1024
{
1,362,291✔
1025
    std::vector<FreeSpaceEntry> free_in_file;
1,362,291✔
1026
    auto& new_free_space = m_group.m_alloc.get_free_read_only(); // Throws
1,362,291✔
1027
    auto nb_elements =
1,362,291✔
1028
        m_size_map.size() + m_not_free_in_file.size() + m_under_evacuation.size() + new_free_space.size();
1,362,291✔
1029
    free_in_file.reserve(nb_elements);
1,362,291✔
1030

686,982✔
1031
    size_t reserve_ndx = realm::npos;
1,362,291✔
1032

686,982✔
1033
    for (const auto& entry : m_size_map) {
12,884,568✔
1034
        free_in_file.emplace_back(entry.second, entry.first, 0);
12,884,568✔
1035
    }
12,884,568✔
1036

686,982✔
1037
    {
1,362,291✔
1038
        size_t locked_space_size = 0;
1,362,291✔
1039
        for (const auto& locked : m_not_free_in_file) {
4,930,626✔
1040
            free_in_file.emplace_back(locked.ref, locked.size, locked.released_at_version);
4,930,626✔
1041
            locked_space_size += locked.size;
4,930,626✔
1042
        }
4,930,626✔
1043

686,982✔
1044
        for (const auto& free_space : new_free_space) {
9,758,010✔
1045
            free_in_file.emplace_back(free_space.first, free_space.second, m_current_version);
9,758,010✔
1046
            locked_space_size += free_space.second;
9,758,010✔
1047
        }
9,758,010✔
1048
        m_locked_space_size = locked_space_size;
1,362,291✔
1049
    }
1,362,291✔
1050

686,982✔
1051
    for (const auto& elem : m_under_evacuation) {
784,932✔
1052
        free_in_file.emplace_back(elem.ref, elem.size, 0);
192,480✔
1053
    }
192,480✔
1054

686,982✔
1055
    REALM_ASSERT(free_in_file.size() == nb_elements);
1,362,291✔
1056
    std::sort(begin(free_in_file), end(free_in_file), [](auto& a, auto& b) {
169,932,948✔
1057
        return a.ref < b.ref;
169,932,948✔
1058
    });
169,932,948✔
1059

686,982✔
1060
    {
1,362,291✔
1061
        // Copy into arrays while checking consistency
686,982✔
1062
        size_t prev_ref = 0;
1,362,291✔
1063
        size_t prev_size = 0;
1,362,291✔
1064
        size_t free_space_size = 0;
1,362,291✔
1065
        auto limit = free_in_file.size();
1,362,291✔
1066
        for (size_t i = 0; i < limit; ++i) {
29,082,597✔
1067
            const auto& free_space = free_in_file[i];
27,720,306✔
1068
            auto ref = free_space.ref;
27,720,306✔
1069
            if (REALM_UNLIKELY(prev_ref + prev_size > ref)) {
27,720,306✔
1070
                // Check if we are freeing arrays already in 'm_not_free_in_file'
1071
                for (const auto& elem : new_free_space) {
×
1072
                    ref_type free_ref = elem.first;
×
1073
                    size_t free_sz = elem.second;
×
1074
                    for (const auto& locked : m_not_free_in_file) {
×
1075
                        REALM_ASSERT_RELEASE_EX(free_ref < locked.ref || free_ref >= (locked.ref + locked.size),
×
1076
                                                locked.ref, locked.size, locked.released_at_version, free_ref,
×
1077
                                                m_current_version, m_alloc.get_file_path_for_assertions());
×
1078
                        REALM_ASSERT_RELEASE_EX(locked.ref < free_ref || locked.ref >= (free_ref + free_sz),
×
1079
                                                locked.ref, locked.released_at_version, free_ref, free_sz,
×
1080
                                                m_current_version, m_alloc.get_file_path_for_assertions());
×
1081
                    }
×
1082
                }
×
1083

1084
                REALM_ASSERT_RELEASE_EX(prev_ref + prev_size <= ref, prev_ref, prev_size, ref, i, limit,
×
1085
                                        m_alloc.get_file_path_for_assertions());
×
1086
            }
×
1087
            if (reserve_pos == ref) {
27,720,306✔
1088
                reserve_ndx = i;
1,362,282✔
1089
            }
1,362,282✔
1090
            else {
26,358,024✔
1091
                // The reserved chunk should not be counted in now. We don't know how much of it
13,343,313✔
1092
                // will eventually be used.
13,343,313✔
1093
                free_space_size += free_space.size;
26,358,024✔
1094
            }
26,358,024✔
1095
            m_free_positions.add(free_space.ref);
27,720,306✔
1096
            m_free_lengths.add(free_space.size);
27,720,306✔
1097
            m_free_versions.add(free_space.released_at_version);
27,720,306✔
1098
            prev_ref = free_space.ref;
27,720,306✔
1099
            prev_size = free_space.size;
27,720,306✔
1100
        }
27,720,306✔
1101
        REALM_ASSERT_RELEASE(reserve_ndx != realm::npos);
1,362,291✔
1102

686,982✔
1103
        m_free_space_size = free_space_size;
1,362,291✔
1104
    }
1,362,291✔
1105

686,982✔
1106
    return reserve_ndx;
1,362,291✔
1107
}
1,362,291✔
1108

1109
void GroupWriter::merge_adjacent_entries_in_freelist(std::vector<GroupWriter::FreeSpaceEntry>& list)
1110
{
2,724,438✔
1111
    if (list.size() > 1) {
2,724,438✔
1112
        // Combine any adjacent chunks in the freelist
621,165✔
1113
        auto prev = list.begin();
1,228,035✔
1114
        auto end = list.end();
1,228,035✔
1115
        for (auto it = list.begin() + 1; it != end; ++it) {
22,134,657✔
1116
            REALM_ASSERT(it->ref > prev->ref);
20,906,622✔
1117
            if (prev->ref + prev->size == it->ref) {
20,906,622✔
1118
                prev->size += it->size;
2,622,015✔
1119
                it->size = 0;
2,622,015✔
1120
            }
2,622,015✔
1121
            else {
18,284,607✔
1122
                prev = it;
18,284,607✔
1123
            }
18,284,607✔
1124
        }
20,906,622✔
1125
    }
1,228,035✔
1126
}
2,724,438✔
1127

1128
void GroupWriter::move_free_in_file_to_size_map(const std::vector<GroupWriter::FreeSpaceEntry>& list,
1129
                                                std::multimap<size_t, size_t>& size_map)
1130
{
1,362,306✔
1131
    ALLOC_DBG_COUT("  Freelist (true free): ");
1,362,306✔
1132
    for (auto& elem : list) {
21,932,760✔
1133
        // Skip elements merged in 'merge_adjacent_entries_in_freelist'
11,085,033✔
1134
        if (elem.size) {
21,932,760✔
1135
            REALM_ASSERT_RELEASE_EX(!(elem.size & 7), elem.size);
19,383,663✔
1136
            REALM_ASSERT_RELEASE_EX(!(elem.ref & 7), elem.ref);
19,383,663✔
1137
            size_map.emplace(elem.size, elem.ref);
19,383,663✔
1138
            ALLOC_DBG_COUT("[" << elem.ref << ", " << elem.size << "] ");
19,383,663✔
1139
        }
19,383,663✔
1140
    }
21,932,760✔
1141
    ALLOC_DBG_COUT(std::endl);
1,362,306✔
1142
}
1,362,306✔
1143

1144
size_t GroupWriter::get_free_space(size_t size)
1145
{
23,253,027✔
1146
    REALM_ASSERT_3(size % 8, ==, 0); // 8-byte alignment
23,253,027✔
1147

11,652,978✔
1148
    auto p = reserve_free_space(size);
23,253,027✔
1149

11,652,978✔
1150
    // Claim space from identified chunk
11,652,978✔
1151
    size_t chunk_pos = p->second;
23,253,027✔
1152
    size_t chunk_size = p->first;
23,253,027✔
1153
    REALM_ASSERT_3(chunk_size, >=, size);
23,253,027✔
1154
    REALM_ASSERT_RELEASE_EX(!(chunk_pos & 7), chunk_pos);
23,253,027✔
1155
    REALM_ASSERT_RELEASE_EX(!(chunk_size & 7), chunk_size);
23,253,027✔
1156

11,652,978✔
1157
    size_t rest = chunk_size - size;
23,253,027✔
1158
    m_size_map.erase(p);
23,253,027✔
1159
    if (rest > 0) {
23,253,027✔
1160
        // Allocating part of chunk - this alway happens from the beginning
8,337,174✔
1161
        // of the chunk. The call to reserve_free_space may split chunks
8,337,174✔
1162
        // in order to make sure that it returns a chunk from which allocation
8,337,174✔
1163
        // can be done from the beginning
8,337,174✔
1164
        m_size_map.emplace(rest, chunk_pos + size);
16,671,498✔
1165
    }
16,671,498✔
1166
    return chunk_pos;
23,253,027✔
1167
}
23,253,027✔
1168

1169

1170
inline GroupWriter::FreeListElement GroupWriter::split_freelist_chunk(FreeListElement it, size_t alloc_pos)
1171
{
12✔
1172
    size_t start_pos = it->second;
12✔
1173
    size_t chunk_size = it->first;
12✔
1174
    m_size_map.erase(it);
12✔
1175
    REALM_ASSERT_RELEASE_EX(alloc_pos > start_pos, alloc_pos, start_pos);
12✔
1176

6✔
1177
    REALM_ASSERT_RELEASE_EX(!(alloc_pos & 7), alloc_pos);
12✔
1178
    size_t size_first = alloc_pos - start_pos;
12✔
1179
    size_t size_second = chunk_size - size_first;
12✔
1180
    m_size_map.emplace(size_first, start_pos);
12✔
1181
    return m_size_map.emplace(size_second, alloc_pos);
12✔
1182
}
12✔
1183

1184
GroupWriter::FreeListElement GroupWriter::search_free_space_in_free_list_element(FreeListElement it, size_t size)
1185
{
24,628,770✔
1186
    SlabAlloc& alloc = m_group.m_alloc;
24,628,770✔
1187
    size_t chunk_size = it->first;
24,628,770✔
1188

12,352,827✔
1189
    // search through the chunk, finding a place within it,
12,352,827✔
1190
    // where an allocation will not cross a mmap boundary
12,352,827✔
1191
    size_t start_pos = it->second;
24,628,770✔
1192
    size_t alloc_pos = alloc.find_section_in_range(start_pos, chunk_size, size);
24,628,770✔
1193
    if (alloc_pos == 0) {
24,628,770✔
1194
        return m_size_map.end();
51✔
1195
    }
51✔
1196
    // we found a place - if it's not at the beginning of the chunk,
12,352,797✔
1197
    // we split the chunk so that the allocation can be done from the
12,352,797✔
1198
    // beginning of the second chunk.
12,352,797✔
1199
    if (alloc_pos != start_pos) {
24,628,719✔
1200
        it = split_freelist_chunk(it, alloc_pos);
12✔
1201
    }
12✔
1202
    // Match found!
12,352,797✔
1203
    ALLOC_DBG_COUT("    alloc [" << alloc_pos << ", " << size << "]" << std::endl);
24,628,719✔
1204
    return it;
24,628,719✔
1205
}
24,628,719✔
1206

1207
GroupWriter::FreeListElement GroupWriter::search_free_space_in_part_of_freelist(size_t size)
1208
{
24,614,601✔
1209
    auto it = m_size_map.lower_bound(size);
24,614,601✔
1210
    while (it != m_size_map.end()) {
29,949,339✔
1211
        // Accept either a perfect match or a block that is twice the size. Tests have shown
15,027,735✔
1212
        // that this is a good strategy.
15,027,735✔
1213
        if (it->first == size || it->first >= 2 * size) {
29,861,130✔
1214
            auto ret = search_free_space_in_free_list_element(it, size);
24,527,874✔
1215
            if (ret != m_size_map.end()) {
24,528,996✔
1216
                return ret;
24,526,392✔
1217
            }
24,526,392✔
1218
            ++it;
2,147,486,251✔
1219
        }
2,147,486,251✔
1220
        else {
5,333,256✔
1221
            // If block was too small, search for the first that is at least twice as big.
2,733,477✔
1222
            it = m_size_map.lower_bound(2 * size);
5,333,256✔
1223
        }
5,333,256✔
1224
    }
29,861,130✔
1225
    // No match
12,339,057✔
1226
    return m_size_map.end();
12,379,863✔
1227
}
24,614,601✔
1228

1229

1230
GroupWriter::FreeListElement GroupWriter::reserve_free_space(size_t size)
1231
{
24,614,670✔
1232
    auto chunk = search_free_space_in_part_of_freelist(size);
24,614,670✔
1233
    while (chunk == m_size_map.end()) {
24,714,411✔
1234
        if (!m_under_evacuation.empty()) {
99,741✔
1235
            // We have been too aggressive in setting the evacuation limit
21✔
1236
            // Just give up
21✔
1237
            // But first we will release all kept back elements
21✔
1238
            for (auto& elem : m_under_evacuation) {
2,370✔
1239
                m_size_map.emplace(elem.size, elem.ref);
2,370✔
1240
            }
2,370✔
1241
            m_under_evacuation.clear();
84✔
1242
            m_evacuation_limit = 0;
84✔
1243
            m_backoff = 10;
84✔
1244
            if (auto logger = m_group.get_logger()) {
84✔
1245
                logger->log(util::Logger::Level::detail, "Give up compaction");
×
1246
            }
×
1247
            chunk = search_free_space_in_part_of_freelist(size);
84✔
1248
        }
84✔
1249
        else {
99,657✔
1250
            // No free space, so we have to extend the file.
58,215✔
1251
            auto new_chunk = extend_free_space(size);
99,657✔
1252
            chunk = search_free_space_in_free_list_element(new_chunk, size);
99,657✔
1253
        }
99,657✔
1254
    }
99,741✔
1255
    return chunk;
24,614,670✔
1256
}
24,614,670✔
1257

1258
// Extend the free space with at least the requested size.
1259
// Due to mmap constraints, the extension can not be guaranteed to
1260
// allow an allocation of the requested size, so multiple calls to
1261
// extend_free_space may be needed, before an allocation can succeed.
1262
GroupWriter::FreeListElement GroupWriter::extend_free_space(size_t requested_size)
1263
{
99,657✔
1264
    // We need to consider the "logical" size of the file here, and not the real
58,215✔
1265
    // size. The real size may have changed without the free space information
58,215✔
1266
    // having been adjusted accordingly. This can happen, for example, if
58,215✔
1267
    // write_group() fails before writing the new top-ref, but after having
58,215✔
1268
    // extended the file size. It can also happen as part of initial file expansion
58,215✔
1269
    // during attach_file().
58,215✔
1270
    size_t logical_file_size = to_size_t(m_group.m_top.get(2) / 2);
99,657✔
1271
    // find minimal new size according to the following growth ratios:
58,215✔
1272
    // at least 100% (doubling) until we reach 1MB, then just grow with 1MB at a time
58,215✔
1273
    uint64_t minimal_new_size = logical_file_size;
99,657✔
1274
    constexpr uint64_t growth_boundary = 1024 * 1024; // 1MB
99,657✔
1275
    if (minimal_new_size < growth_boundary) {
99,657✔
1276
        minimal_new_size *= 2;
94,467✔
1277
    }
94,467✔
1278
    else {
5,190✔
1279
        minimal_new_size += growth_boundary;
5,190✔
1280
    }
5,190✔
1281
    // grow with at least the growth ratio, but if more is required, grow more
58,215✔
1282
    uint64_t required_new_size = logical_file_size + requested_size;
99,657✔
1283
    if (required_new_size > minimal_new_size) {
99,657✔
1284
        minimal_new_size = required_new_size;
66,399✔
1285
    }
66,399✔
1286
    // Ensure that minimal_new_size is less than 3 GB on a 32 bit device
58,215✔
1287
    if (minimal_new_size > (std::numeric_limits<size_t>::max() / 4 * 3)) {
99,657✔
1288
        throw MaximumFileSizeExceeded("GroupWriter cannot extend free space: " + util::to_string(logical_file_size) +
×
1289
                                      " + " + util::to_string(requested_size));
×
1290
    }
×
1291

58,215✔
1292
    // We now know that it is safe to assign size to something of size_t
58,215✔
1293
    // and we know that the following adjustments are safe to perform
58,215✔
1294
    size_t new_file_size = static_cast<size_t>(minimal_new_size);
99,657✔
1295

58,215✔
1296
    // align to page size, but do not cross a section boundary
58,215✔
1297
    size_t next_boundary = m_alloc.align_size_to_section_boundary(new_file_size);
99,657✔
1298
    new_file_size = util::round_up_to_page_size(new_file_size);
99,657✔
1299
    if (new_file_size > next_boundary) {
99,657✔
1300
        // we cannot cross a section boundary. In this case the allocation will
1301
        // likely fail, then retry and we'll allocate anew from the next section
1302
        new_file_size = next_boundary;
×
1303
    }
×
1304
    // The size must be a multiple of 8. This is guaranteed as long as
58,215✔
1305
    // the initial size is a multiple of 8.
58,215✔
1306
    REALM_ASSERT_RELEASE_EX(!(new_file_size & 7), new_file_size);
99,657✔
1307
    REALM_ASSERT_3(logical_file_size, <, new_file_size);
99,657✔
1308

58,215✔
1309
    // Note: resize_file() will call File::prealloc() which may misbehave under
58,215✔
1310
    // race conditions (see documentation of File::prealloc()). Fortunately, no
58,215✔
1311
    // race conditions can occur, because in transactional mode we hold a write
58,215✔
1312
    // lock at this time, and in non-transactional mode it is the responsibility
58,215✔
1313
    // of the user to ensure non-concurrent file mutation.
58,215✔
1314
    m_alloc.resize_file(new_file_size); // Throws
99,657✔
1315
    REALM_ASSERT(new_file_size <= get_file_size());
99,657✔
1316
    ALLOC_DBG_COUT("        ** File extension to " << new_file_size << "     after request for " << requested_size
99,657✔
1317
                                                   << std::endl);
99,657✔
1318

58,215✔
1319
    // as new_file_size is larger than logical_file_size, but known to
58,215✔
1320
    // be representable in a size_t, so is the result:
58,215✔
1321
    size_t chunk_size = new_file_size - logical_file_size;
99,657✔
1322
    REALM_ASSERT_RELEASE_EX(!(chunk_size & 7), chunk_size);
99,657✔
1323
    REALM_ASSERT_RELEASE(chunk_size != 0);
99,657✔
1324
    auto it = m_size_map.emplace(chunk_size, logical_file_size);
99,657✔
1325

58,215✔
1326
    // Update the logical file size
58,215✔
1327
    m_logical_size = new_file_size;
99,657✔
1328
    m_group.m_top.set(Group::s_file_size_ndx, RefOrTagged::make_tagged(m_logical_size));
99,657✔
1329

58,215✔
1330
    // std::cout << "New file size = " << std::hex << m_logical_size << std::dec << std::endl;
58,215✔
1331

58,215✔
1332
    return it;
99,657✔
1333
}
99,657✔
1334

1335
bool inline is_aligned(char* addr)
1336
{
23,704,449✔
1337
    size_t as_binary = reinterpret_cast<size_t>(addr);
23,704,449✔
1338
    return (as_binary & 7) == 0;
23,704,449✔
1339
}
23,704,449✔
1340

1341
ref_type GroupWriter::write_array(const char* data, size_t size, uint32_t checksum)
1342
{
18,250,611✔
1343
    // Get position of free space to write in (expanding file if needed)
9,157,542✔
1344
    size_t pos = get_free_space(size);
18,250,611✔
1345

9,157,542✔
1346
    // Write the block
9,157,542✔
1347
    MapWindow* window = m_window_mgr.get_window(pos, size);
18,250,611✔
1348
    char* dest_addr = window->translate(pos);
18,250,611✔
1349
    REALM_ASSERT_RELEASE(is_aligned(dest_addr));
18,250,611✔
1350
    window->encryption_read_barrier(dest_addr, size);
18,250,611✔
1351
    memcpy(dest_addr, &checksum, 4);
18,250,611✔
1352
    memcpy(dest_addr + 4, data + 4, size - 4);
18,250,611✔
1353
    window->encryption_write_barrier(dest_addr, size);
18,250,611✔
1354
    // return ref of the written array
9,157,542✔
1355
    ref_type ref = to_ref(pos);
18,250,611✔
1356
    return ref;
18,250,611✔
1357
}
18,250,611✔
1358

1359
template <class T>
1360
void GroupWriter::write_array_at(T* translator, ref_type ref, const char* data, size_t size)
1361
{
5,448,663✔
1362
    size_t pos = size_t(ref);
5,448,663✔
1363

2,747,643✔
1364
    REALM_ASSERT_3(pos + size, <=, to_size_t(m_group.m_top.get(2) / 2));
5,448,663✔
1365
    // REALM_ASSERT_3(pos + size, <=, m_file_map.get_size());
2,747,643✔
1366
    char* dest_addr = translator->translate(pos);
5,448,663✔
1367
    REALM_ASSERT_RELEASE(is_aligned(dest_addr));
5,448,663✔
1368

2,747,643✔
1369
    uint32_t dummy_checksum = 0x41414141UL; // "AAAA" in ASCII
5,448,663✔
1370
    memcpy(dest_addr, &dummy_checksum, 4);
5,448,663✔
1371
    memcpy(dest_addr + 4, data + 4, size - 4);
5,448,663✔
1372
}
5,448,663✔
1373

1374

1375
void GroupCommitter::commit(ref_type new_top_ref)
1376
{
1,194,261✔
1377
    using _impl::SimulatedFailure;
1,194,261✔
1378
    SimulatedFailure::trigger(SimulatedFailure::group_writer__commit); // Throws
1,194,261✔
1379

602,967✔
1380
    MapWindow* window = m_window_mgr.get_window(0, sizeof(SlabAlloc::Header));
1,194,261✔
1381
    SlabAlloc::Header& file_header = *reinterpret_cast<SlabAlloc::Header*>(window->translate(0));
1,194,261✔
1382
    window->encryption_read_barrier(&file_header, sizeof file_header);
1,194,261✔
1383

602,967✔
1384
    // One bit of the flags field selects which of the two top ref slots are in
602,967✔
1385
    // use (same for file format version slots). The current value of the bit
602,967✔
1386
    // reflects the currently bound snapshot, so we need to invert it for the
602,967✔
1387
    // new snapshot. Other bits must remain unchanged.
602,967✔
1388
    unsigned old_flags = file_header.m_flags;
1,194,261✔
1389
    unsigned new_flags = old_flags ^ SlabAlloc::flags_SelectBit;
1,194,261✔
1390
    int slot_selector = ((new_flags & SlabAlloc::flags_SelectBit) != 0 ? 1 : 0);
902,922✔
1391

602,967✔
1392
    // Update top ref and file format version
602,967✔
1393
    int file_format_version = m_group.get_file_format_version();
1,194,261✔
1394
    using type_1 = std::remove_reference<decltype(file_header.m_file_format[0])>::type;
1,194,261✔
1395
    REALM_ASSERT(!util::int_cast_has_overflow<type_1>(file_format_version));
1,194,261✔
1396
    // only write the file format field if necessary (optimization)
602,967✔
1397
    if (type_1(file_format_version) != file_header.m_file_format[slot_selector]) {
1,194,261✔
1398
        // write barrier on the entire `file_header` happens below
25,959✔
1399
        file_header.m_file_format[slot_selector] = type_1(file_format_version);
53,772✔
1400
    }
53,772✔
1401

602,967✔
1402
    // When running the test suite, device synchronization is disabled
602,967✔
1403
    bool disable_sync = get_disable_sync_to_disk() || m_durability == Durability::Unsafe;
1,194,261✔
1404
    file_header.m_top_ref[slot_selector] = new_top_ref;
1,194,261✔
1405

602,967✔
1406
    // Make sure that that all data relating to the new snapshot is written to
602,967✔
1407
    // stable storage before flipping the slot selector
602,967✔
1408
    window->encryption_write_barrier(&file_header, sizeof file_header);
1,194,261✔
1409
    m_window_mgr.flush_all_mappings();
1,194,261✔
1410
    if (!disable_sync) {
1,194,261✔
1411
        m_window_mgr.sync_all_mappings();
216✔
1412
        m_alloc.get_file().barrier();
216✔
1413
    }
216✔
1414

602,967✔
1415
    // Flip the slot selector bit.
602,967✔
1416
    window->encryption_read_barrier(&file_header, sizeof file_header);
1,194,261✔
1417
    using type_2 = std::remove_reference<decltype(file_header.m_flags)>::type;
1,194,261✔
1418
    file_header.m_flags = type_2(new_flags);
1,194,261✔
1419

602,967✔
1420
    // Write new selector to disk
602,967✔
1421
    window->encryption_write_barrier(&file_header.m_flags, sizeof(file_header.m_flags));
1,194,261✔
1422
    window->flush();
1,194,261✔
1423
    if (!disable_sync) {
1,194,261✔
1424
        window->sync();
216✔
1425
        m_alloc.get_file().barrier();
216✔
1426
    }
216✔
1427
}
1,194,261✔
1428

1429

1430
#ifdef REALM_DEBUG
1431

1432
void GroupWriter::dump()
1433
{
×
1434
    size_t count = m_free_lengths.size();
×
1435
    std::cout << "count: " << count << ", m_size = " << m_alloc.get_file_size() << ", "
×
1436
              << "version >= " << m_oldest_reachable_version << "\n";
×
1437
    for (size_t i = 0; i < count; ++i) {
×
1438
        std::cout << i << ": " << m_free_positions.get(i) << ", " << m_free_lengths.get(i) << " - "
×
1439
                  << m_free_versions.get(i) << "\n";
×
1440
    }
×
1441
}
×
1442

1443
#endif
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc