• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / jorgen.edelbo_334

01 Jul 2024 07:22AM UTC coverage: 90.829% (-0.04%) from 90.865%
jorgen.edelbo_334

Pull #7803

Evergreen

jedelbo
Merge branch 'next-major' into feature/string-compression
Pull Request #7803: Feature/string compression

102912 of 180568 branches covered (56.99%)

1141 of 1267 new or added lines in 33 files covered. (90.06%)

172 existing lines in 24 files now uncovered.

218291 of 240332 relevant lines covered (90.83%)

7818396.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.27
/src/realm/array_string_short.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <cstdlib>
20
#include <algorithm>
21
#include <cstring>
22

23
#ifdef REALM_DEBUG
24
#include <cstdio>
25
#include <iostream>
26
#include <iomanip>
27
#endif
28

29
#include <realm/utilities.hpp>
30
#include <realm/array_string_short.hpp>
31
#include <realm/impl/destroy_guard.hpp>
32
#include <realm/column_integer.hpp>
33

34
using namespace realm;
35

36

37
namespace {
38

39
// Round up to nearest possible block length: 0, 1, 2, 4, 8, 16, 32, 64, 128, 256. We include 1 to store empty
40
// strings in as little space as possible, because 0 can only store nulls.
41
size_t round_up(size_t size)
42
{
610,215✔
43
    REALM_ASSERT(size <= 256);
610,215✔
44

45
    if (size <= 2)
610,215✔
46
        return size;
19,839✔
47

48
    size--;
590,376✔
49
    size |= size >> 1;
590,376✔
50
    size |= size >> 2;
590,376✔
51
    size |= size >> 4;
590,376✔
52
    ++size;
590,376✔
53
    return size;
590,376✔
54
}
610,215✔
55

56
} // anonymous namespace
57

58
bool ArrayStringShort::is_null(size_t ndx) const
59
{
595,905✔
60
    REALM_ASSERT_3(ndx, <, m_size);
595,905✔
61
    StringData sd = get(ndx);
595,905✔
62
    return sd.is_null();
595,905✔
63
}
595,905✔
64

65
void ArrayStringShort::set_null(size_t ndx)
66
{
18✔
67
    REALM_ASSERT_3(ndx, <, m_size);
18✔
68
    StringData sd = realm::null();
18✔
69
    set(ndx, sd);
18✔
70
}
18✔
71

72
void ArrayStringShort::set(size_t ndx, StringData value)
73
{
2,483,334✔
74
    REALM_ASSERT_3(ndx, <, m_size);
2,483,334✔
75
    REALM_ASSERT_3(value.size(), <, max_width); // otherwise we have to use another column type
2,483,334✔
76

77
    // if m_width == 0 and m_nullable == true, then entire array contains only null entries
78
    // if m_width == 0 and m_nullable == false, then entire array contains only "" entries
79
    if ((m_nullable ? value.is_null() : value.size() == 0) && m_width == 0) {
2,483,334✔
80
        return; // existing element in array already equals the value we want to set it to
51,636✔
81
    }
51,636✔
82

83
    // Make room for the new value plus a zero-termination
84
    if (m_width <= value.size()) {
2,431,698✔
85
        // Calc min column width
86
        size_t new_width = ::round_up(value.size() + 1);
610,215✔
87
        const size_t old_width = m_width;
610,215✔
88
        alloc(m_size, new_width); // Throws
610,215✔
89

90
        char* base = m_data;
610,215✔
91
        char* new_end = base + m_size * new_width;
610,215✔
92

93
        // Expand the old values in reverse order
94
        if (old_width > 0) {
610,215✔
95
            const char* old_end = base + m_size * old_width;
171,063✔
96
            while (new_end != base) {
679,698✔
97
                *--new_end = char(*--old_end + (new_width - old_width));
508,635✔
98
                {
508,635✔
99
                    // extend 0-padding
100
                    char* new_begin = new_end - (new_width - old_width);
508,635✔
101
                    std::fill(new_begin, new_end, 0);
508,635✔
102
                    new_end = new_begin;
508,635✔
103
                }
508,635✔
104
                {
508,635✔
105
                    // copy string payload
106
                    const char* old_begin = old_end - (old_width - 1);
508,635✔
107
                    if (static_cast<size_t>(old_end - old_begin) < old_width) // non-null string
508,635✔
108
                        new_end = std::copy_backward(old_begin, old_end, new_end);
508,635✔
109
                    old_end = old_begin;
508,635✔
110
                }
508,635✔
111
            }
508,635✔
112
        }
171,063✔
113
        else {
439,152✔
114
            // old_width == 0. Expand to new width.
115
            while (new_end != base) {
908,943✔
116
                REALM_ASSERT_3(new_width, <=, max_width);
469,791✔
117
                *--new_end = static_cast<char>(new_width);
469,791✔
118
                {
469,791✔
119
                    char* new_begin = new_end - (new_width - 1);
469,791✔
120
                    std::fill(new_begin, new_end, 0); // Fill with zero bytes
469,791✔
121
                    new_end = new_begin;
469,791✔
122
                }
469,791✔
123
            }
469,791✔
124
        }
439,152✔
125
    }
610,215✔
126
    else if (is_read_only()) {
1,821,483✔
127
        if (get(ndx) == value)
1,137✔
UNCOV
128
            return;
×
129
        copy_on_write();
1,137✔
130
    }
1,137✔
131

132
    REALM_ASSERT_3(0, <, m_width);
2,431,698✔
133

134
    // Set the value
135
    char* begin = m_data + (ndx * m_width);
2,431,698✔
136
    char* end = begin + (m_width - 1);
2,431,698✔
137
    begin = realm::safe_copy_n(value.data(), value.size(), begin);
2,431,698✔
138
    std::fill(begin, end, 0); // Pad with zero bytes
2,431,698✔
139
    static_assert(max_width <= max_width, "Padding size must fit in 7-bits");
2,431,698✔
140

141
    if (value.is_null()) {
2,431,698✔
142
        REALM_ASSERT_3(m_width, <=, 128);
47,808✔
143
        *end = static_cast<char>(m_width);
47,808✔
144
    }
47,808✔
145
    else {
2,383,890✔
146
        int pad_size = int(end - begin);
2,383,890✔
147
        *end = char(pad_size);
2,383,890✔
148
    }
2,383,890✔
149
}
2,431,698✔
150

151

152
void ArrayStringShort::insert(size_t ndx, StringData value)
153
{
1,998,756✔
154
    REALM_ASSERT_3(ndx, <=, m_size);
1,998,756✔
155
    REALM_ASSERT(value.size() < max_width); // otherwise we have to use another column type
1,998,756✔
156

157
    // FIXME: this performs up to 2 memcpy() operations. This could be improved
158
    // by making the allocator make a gap for the new value for us, but it's a
159
    // bit complex.
160

161
    // Allocate room for the new value
162
    const auto old_size = m_size;
1,998,756✔
163
    alloc(m_size + 1, m_width); // Throws
1,998,756✔
164

165
    // Make gap for new value
166
    memmove(m_data + m_width * (ndx + 1), m_data + m_width * ndx, m_width * (old_size - ndx));
1,998,756✔
167

168
    // Set new value
169
    set(ndx, value);
1,998,756✔
170
    return;
1,998,756✔
171
}
1,998,756✔
172

173
void ArrayStringShort::erase(size_t ndx)
174
{
29,169✔
175
    REALM_ASSERT_3(ndx, <, m_size);
29,169✔
176

177
    // Check if we need to copy before modifying
178
    copy_on_write(); // Throws
29,169✔
179

180
    // move data backwards after deletion
181
    if (ndx < m_size - 1) {
29,169✔
182
        char* new_begin = m_data + ndx * m_width;
17,115✔
183
        char* old_begin = new_begin + m_width;
17,115✔
184
        char* old_end = m_data + m_size * m_width;
17,115✔
185
        realm::safe_copy_n(old_begin, old_end - old_begin, new_begin);
17,115✔
186
    }
17,115✔
187

188
    --m_size;
29,169✔
189

190
    // Update size in header
191
    set_header_size(m_size);
29,169✔
192
}
29,169✔
193

194
size_t ArrayStringShort::calc_byte_len(size_t num_items, size_t width) const
195
{
2,608,569✔
196
    return header_size + (num_items * width);
2,608,569✔
197
}
2,608,569✔
198

199
size_t ArrayStringShort::calc_item_count(size_t bytes, size_t width) const noexcept
200
{
×
201
    if (width == 0)
×
202
        return size_t(-1); // zero-width gives infinite space
×
203

204
    size_t bytes_without_header = bytes - header_size;
×
205
    return bytes_without_header / width;
×
206
}
×
207

208
size_t ArrayStringShort::count(StringData value, size_t begin, size_t end) const noexcept
209
{
6✔
210
    size_t num_matches = 0;
6✔
211

212
    size_t begin_2 = begin;
6✔
213
    for (;;) {
24✔
214
        size_t ndx = find_first(value, begin_2, end);
24✔
215
        if (ndx == not_found)
24✔
216
            break;
6✔
217
        ++num_matches;
18✔
218
        begin_2 = ndx + 1;
18✔
219
    }
18✔
220

221
    return num_matches;
6✔
222
}
6✔
223

224
size_t ArrayStringShort::find_first(StringData value, size_t begin, size_t end) const noexcept
225
{
20,902,602✔
226
    if (end == size_t(-1))
20,902,602✔
227
        end = m_size;
17,978,877✔
228
    REALM_ASSERT(begin <= m_size && end <= m_size && begin <= end);
20,902,602✔
229

230
    if (m_width == 0) {
20,902,602✔
231
        if (m_nullable)
1,074,342✔
232
            // m_width == 0 implies that all elements in the array are NULL
233
            return value.is_null() && begin < m_size ? begin : npos;
1,170✔
234
        else
1,073,172✔
235
            return value.size() == 0 && begin < m_size ? begin : npos;
1,073,172✔
236
    }
1,074,342✔
237

238
    const size_t value_size = value.size();
19,828,260✔
239
    // A string can never be wider than the column width
240
    if (m_width <= value_size)
19,828,260✔
241
        return size_t(-1);
274,125✔
242

243
    if (m_nullable ? value.is_null() : value_size == 0) {
19,554,135✔
244
        for (size_t i = begin; i != end; ++i) {
201,513✔
245
            if (m_nullable ? is_null(i) : get(i).size() == 0)
146,916✔
246
                return i;
3,411✔
247
        }
146,916✔
248
    }
58,008✔
249
    else if (value_size == 0) {
19,496,127✔
250
        const char* data = m_data + (m_width - 1);
1,189,614✔
251
        for (size_t i = begin; i != end; ++i) {
3,566,163✔
252
            size_t data_i_size = (m_width - 1) - data[i * m_width];
3,566,061✔
253
            // left-hand-side tests if array element is NULL
254
            if (REALM_UNLIKELY(data_i_size == 0))
3,566,061✔
255
                return i;
1,189,512✔
256
        }
3,566,061✔
257
    }
1,189,614✔
258
    else {
18,306,513✔
259
        for (size_t i = begin; i != end; ++i) {
229,840,791✔
260
            const char* data = m_data + (i * m_width);
229,075,263✔
261
            if (memcmp(data, value.data(), value_size) == 0) {
229,075,263✔
262
                size_t data_size = (m_width - 1) - data[m_width - 1];
17,346,993✔
263
                if (data_size == value_size) {
17,598,651✔
264
                    return i;
17,540,985✔
265
                }
17,540,985✔
266
            }
17,346,993✔
267
        }
229,075,263✔
268
    }
18,306,513✔
269

270
    return not_found;
820,227✔
271
}
19,554,135✔
272

273
void ArrayStringShort::find_all(IntegerColumn& result, StringData value, size_t add_offset, size_t begin, size_t end)
274
{
6✔
275
    size_t begin_2 = begin;
6✔
276
    for (;;) {
24✔
277
        size_t ndx = find_first(value, begin_2, end);
24✔
278
        if (ndx == not_found)
24✔
279
            break;
6✔
280
        result.add(add_offset + ndx); // Throws
18✔
281
        begin_2 = ndx + 1;
18✔
282
    }
18✔
283
}
6✔
284

285
bool ArrayStringShort::compare_string(const ArrayStringShort& c) const noexcept
286
{
24✔
287
    if (c.size() != size())
24✔
288
        return false;
6✔
289

290
    for (size_t i = 0; i < size(); ++i) {
24✔
291
        if (get(i) != c.get(i))
12✔
292
            return false;
6✔
293
    }
12✔
294

295
    return true;
12✔
296
}
18✔
297

298
#ifdef REALM_DEBUG // LCOV_EXCL_START ignore debug functions
299

300
void ArrayStringShort::string_stats() const
301
{
×
302
    size_t total = 0;
×
303
    size_t longest = 0;
×
304

305
    for (size_t i = 0; i < m_size; ++i) {
×
306
        StringData str = get(i);
×
307
        size_t str_size = str.size() + 1;
×
308
        total += str_size;
×
309
        if (str_size > longest)
×
310
            longest = str_size;
×
311
    }
×
312

313
    size_t array_size = m_size * m_width;
×
314
    size_t zeroes = array_size - total;
×
315
    size_t zavg = zeroes / (m_size ? m_size : 1); // avoid possible div by zero
×
316

317
    std::cout << "Size: " << m_size << "\n";
×
318
    std::cout << "Width: " << m_width << "\n";
×
319
    std::cout << "Total: " << array_size << "\n";
×
320
    // std::cout << "Capacity: " << m_capacity << "\n\n";
321
    std::cout << "Bytes string: " << total << "\n";
×
322
    std::cout << "     longest: " << longest << "\n";
×
323
    std::cout << "Bytes zeroes: " << zeroes << "\n";
×
324
    std::cout << "         avg: " << zavg << "\n";
×
325
}
×
326

327
#endif // LCOV_EXCL_STOP ignore debug functions
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc