• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / nicola.cabiddu_1042

27 Sep 2023 06:04PM CUT coverage: 91.085% (-1.8%) from 92.915%
nicola.cabiddu_1042

Pull #6766

Evergreen

nicola-cab
Fix logic for dictionaries
Pull Request #6766: Client Reset for collections in mixed / nested collections

97276 of 178892 branches covered (0.0%)

1994 of 2029 new or added lines in 7 files covered. (98.28%)

4556 existing lines in 112 files now uncovered.

237059 of 260260 relevant lines covered (91.09%)

6321099.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.24
/src/realm/unicode.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <realm/unicode.hpp>
20

21
#include <algorithm>
22
#include <clocale>
23
#include <vector>
24

25
#ifdef _WIN32
26
#ifndef NOMINMAX
27
#define NOMINMAX
28
#endif
29
#include <windows.h>
30
#else
31
#include <ctype.h>
32
#endif
33

34
namespace realm {
35

36
// Highest character currently supported for *sorting* strings in Realm, when using STRING_COMPARE_CPP11.
37
constexpr size_t last_latin_extended_2_unicode = 591;
38

39
// clang-format off
40
// Returns the number of bytes in a UTF-8 sequence whose leading byte is as specified.
41
size_t sequence_length(char lead)
42
{
1,138,927,182✔
43
    // keep 'static' else entire array will be pushed to stack at each call
568,841,916✔
44
    const static unsigned char lengths[256] = {
1,138,927,182✔
45
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1,138,927,182✔
46
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1,138,927,182✔
47
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1,138,927,182✔
48
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
1,138,927,182✔
49
    };
1,138,927,182✔
50

568,841,916✔
51
    return lengths[static_cast<unsigned char>(lead)];
1,138,927,182✔
52
}
1,138,927,182✔
53
// clang-format on
54

55
// Check if the next UTF-8 sequence in [begin, end) is identical to
56
// the one beginning at begin2. If it is, 'begin' is advanced
57
// accordingly.
58
bool equal_sequence(const char*& begin, const char* end, const char* begin2)
59
{
22,962✔
60
    if (begin[0] != begin2[0])
22,962✔
61
        return false;
924✔
62

11,019✔
63
    size_t i = 1;
22,038✔
64
    if (static_cast<int>(std::char_traits<char>::to_int_type(begin[0])) & 0x80) {
22,038✔
65
        // All following bytes matching '10xxxxxx' will be considered
21✔
66
        // as part of this character.
21✔
67
        while (begin + i != end) {
108✔
68
            if ((static_cast<int>(std::char_traits<char>::to_int_type(begin[i])) & (0x80 + 0x40)) != 0x80)
90✔
69
                break;
24✔
70
            if (begin[i] != begin2[i])
66✔
UNCOV
71
                return false;
×
72
            ++i;
66✔
73
        }
66✔
74
    }
42✔
75

11,019✔
76
    begin += i;
22,038✔
77
    return true;
22,038✔
78
}
22,038✔
79

80
// Translate from utf8 char to unicode. No check for invalid utf8; may read out of bounds! Caller must check.
81
uint32_t utf8value(const char* character)
82
{
380,266,626✔
83
    const unsigned char* c = reinterpret_cast<const unsigned char*>(character);
380,266,626✔
84
    size_t len = sequence_length(c[0]);
380,266,626✔
85
    uint32_t res = c[0];
380,266,626✔
86

189,894,411✔
87
    if (len == 1)
380,266,626✔
88
        return res;
376,825,530✔
89

1,718,790✔
90
    res &= (0x3f >> (len - 1));
3,441,096✔
91

1,718,790✔
92
    for (size_t i = 1; i < len; i++)
9,728,379✔
93
        res = ((res << 6) | (c[i] & 0x3f));
6,287,283✔
94

1,718,790✔
95
    return res;
3,441,096✔
96
}
3,441,096✔
97

98
// Returns bool(string1 < string2) for utf-8
99
bool utf8_compare(StringData string1, StringData string2)
100
{
1,476,591✔
101
    const char* s1 = string1.data();
1,476,591✔
102
    const char* s2 = string2.data();
1,476,591✔
103

744,447✔
104
    // This collation_order array has 592 entries; one entry per unicode character in the range 0...591
744,447✔
105
    // (upto and including 'Latin Extended 2'). The value tells what 'sorting order rank' the character
744,447✔
106
    // has, such that unichar1 < unichar2 implies collation_order[unichar1] < collation_order[unichar2]. The
744,447✔
107
    // array is generated from the table found at ftp://ftp.unicode.org/Public/UCA/latest/allkeys.txt. At the
744,447✔
108
    // bottom of unicode.cpp you can find source code that reads such a file and translates it into C++ that
744,447✔
109
    // you can copy/paste in case the official table should get updated.
744,447✔
110
    //
744,447✔
111
    // NOTE: Some numbers in the array are vere large. This is because the value is the *global* rank of the
744,447✔
112
    // almost full unicode set. An optimization could be to 'normalize' all values so they ranged from
744,447✔
113
    // 0...591 so they would fit in a uint16_t array instead of uint32_t.
744,447✔
114
    //
744,447✔
115
    // It groups all characters that look visually identical, that is, it puts `a, ‡, Â` together and before
744,447✔
116
    // `¯, o, ˆ`. Note that this sorting method is wrong in some countries, such as Denmark where `Â` must
744,447✔
117
    // come last. NOTE: This is a limitation of STRING_COMPARE_CORE until we get better such 'locale' support.
744,447✔
118

744,447✔
119
    // clang-format off
744,447✔
120
    static const uint32_t collation_order_core[last_latin_extended_2_unicode + 1] = {
1,476,591✔
121
        0, 2, 3, 4, 5, 6, 7, 8, 9, 33, 34, 35, 36, 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 31, 38, 39, 40, 41, 42, 43, 29, 44, 45, 46, 76, 47, 30, 48, 49, 128, 132, 134, 137, 139, 140, 143, 144, 145, 146, 50, 51, 77, 78, 79, 52, 53, 148, 182, 191, 208, 229, 263, 267, 285, 295, 325, 333, 341, 360, 363, 385, 429, 433, 439, 454, 473, 491, 527, 531, 537, 539, 557, 54, 55, 56, 57, 58, 59, 147, 181, 190, 207,
1,476,591✔
122
        228, 262, 266, 284, 294, 324, 332, 340, 359, 362, 384, 428, 432, 438, 453, 472, 490, 526, 530, 536, 538, 556, 60, 61, 62, 63, 28, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 32, 64, 72, 73, 74, 75, 65, 88, 66, 89, 149, 81, 90, 1, 91, 67, 92, 80, 136, 138, 68, 93, 94, 95, 69, 133, 386, 82, 129, 130, 131, 70, 153, 151, 157, 165, 575, 588, 570, 201, 233,
1,476,591✔
123
        231, 237, 239, 300, 298, 303, 305, 217, 371, 390, 388, 394, 402, 584, 83, 582, 495, 493, 497, 555, 541, 487, 470, 152, 150, 156, 164, 574, 587, 569, 200, 232, 230, 236, 238, 299, 297, 302, 304, 216, 370, 389, 387, 393, 401, 583, 84, 581, 494, 492, 496, 554, 540, 486, 544, 163, 162, 161, 160, 167, 166, 193, 192, 197, 196, 195, 194, 199, 198, 210, 209, 212, 211, 245, 244, 243, 242, 235, 234, 247, 246, 241, 240, 273, 272, 277, 276, 271, 270, 279, 278, 287, 286, 291, 290, 313, 312, 311, 310, 309,
1,476,591✔
124
        308, 315, 314, 301, 296, 323, 322, 328, 327, 337, 336, 434, 343, 342, 349, 348, 347, 346, 345, 344, 353, 352, 365, 364, 373, 372, 369, 368, 375, 383, 382, 400, 399, 398, 397, 586, 585, 425, 424, 442, 441, 446, 445, 444, 443, 456, 455, 458, 457, 462, 461, 460, 459, 477, 476, 475, 474, 489, 488, 505, 504, 503, 502, 501, 500, 507, 506, 549, 548, 509, 508, 533, 532, 543, 542, 545, 559, 558, 561, 560, 563, 562, 471, 183, 185, 187, 186, 189, 188, 206, 205, 204, 226, 215, 214, 213, 218, 257, 258, 259,
1,476,591✔
125
        265, 264, 282, 283, 292, 321, 316, 339, 338, 350, 354, 361, 374, 376, 405, 421, 420, 423, 422, 431, 430, 440, 468, 467, 466, 469, 480, 479, 478, 481, 524, 523, 525, 528, 553, 552, 565, 564, 571, 579, 578, 580, 135, 142, 141, 589, 534, 85, 86, 87, 71, 225, 224, 223, 357, 356, 355, 380, 379, 378, 159, 158, 307, 306, 396, 395, 499, 498, 518, 517, 512, 511, 516, 515, 514, 513, 256, 174, 173, 170, 169, 573, 572, 281, 280, 275, 274, 335, 334, 404, 403, 415, 414, 577, 576, 329, 222, 221, 220, 269,
1,476,591✔
126
        268, 293, 535, 367, 366, 172, 171, 180, 179, 411, 410, 176, 175, 178, 177, 253, 252, 255, 254, 318, 317, 320, 319, 417, 416, 419, 418, 450, 449, 452, 451, 520, 519, 522, 521, 464, 463, 483, 482, 261, 260, 289, 288, 377, 227, 427, 426, 567, 566, 155, 154, 249, 248, 409, 408, 413, 412, 392, 391, 407, 406, 547, 546, 358, 381, 485, 326, 219, 437, 168, 203, 202, 351, 484, 465, 568, 591, 590, 184, 510, 529, 251, 250, 331, 330, 436, 435, 448, 447, 551, 550
1,476,591✔
127
    };
1,476,591✔
128
    // clang-format on
744,447✔
129

744,447✔
130
    // Core-only method. Compares in us_EN locale (sorting may be slightly inaccurate in some countries). Will
744,447✔
131
    // return arbitrary return value for invalid utf8 (silent error treatment). If one or both strings have
744,447✔
132
    // unicodes beyond 'Latin Extended 2' (0...591), then the strings are compared by unicode value.
744,447✔
133
    uint32_t char1;
1,476,591✔
134
    uint32_t char2;
1,476,591✔
135
    do {
190,463,604✔
136
        size_t remaining1 = string1.size() - (s1 - string1.data());
190,463,604✔
137
        size_t remaining2 = string2.size() - (s2 - string2.data());
190,463,604✔
138

95,115,294✔
139
        if ((remaining1 == 0) != (remaining2 == 0)) {
190,463,604✔
140
            // exactly one of the strings have ended (not both or none; xor)
153,177✔
141
            return (remaining1 == 0);
307,269✔
142
        }
307,269✔
143
        else if (remaining2 == 0 && remaining1 == 0) {
190,156,335✔
144
            // strings are identical
27✔
145
            return false;
54✔
146
        }
54✔
147

94,962,090✔
148
        // invalid utf8
94,962,090✔
149
        if (remaining1 < sequence_length(s1[0]) || remaining2 < sequence_length(s2[0]))
190,156,281✔
150
            return false;
927✔
151

94,961,358✔
152
        char1 = utf8value(s1);
190,155,354✔
153
        char2 = utf8value(s2);
190,155,354✔
154

94,961,358✔
155
        if (char1 == char2) {
190,155,354✔
156
            // Go to next characters for both strings
94,369,686✔
157
            s1 += sequence_length(s1[0]);
188,989,542✔
158
            s2 += sequence_length(s2[0]);
188,989,542✔
159
        }
188,989,542✔
160
        else {
1,165,812✔
161
            // Test if above Latin Extended B
591,672✔
162
            if (char1 > last_latin_extended_2_unicode || char2 > last_latin_extended_2_unicode)
1,165,812✔
163
                return char1 < char2;
53,613✔
164

564,525✔
165
            const uint32_t* internal_collation_order = collation_order_core;
1,112,199✔
166
            uint32_t value1 = internal_collation_order[char1];
1,112,199✔
167
            uint32_t value2 = internal_collation_order[char2];
1,112,199✔
168

564,525✔
169
            return value1 < value2;
1,112,199✔
170
        }
1,112,199✔
171

94,961,358✔
172
    } while (true);
189,581,214✔
173
}
1,476,591✔
174

175
// Converts UTF-8 source into upper or lower case. This function
176
// preserves the byte length of each UTF-8 character in following way:
177
// If an output character differs in size, it is simply substituded by
178
// the original character. This may of course give wrong search
179
// results in very special cases. Todo.
180
util::Optional<std::string> case_map(StringData source, bool upper)
181
{
218,574✔
182
    std::string result;
218,574✔
183
    result.resize(source.size());
218,574✔
184

109,290✔
185
#if defined(_WIN32)
186
    constexpr int tmp_buffer_size = 32;
187
    const char* begin = source.data();
188
    const char* end = begin + source.size();
189
    auto output = result.begin();
190
    while (begin != end) {
191
        auto n = end - begin;
192
        if (n > tmp_buffer_size) {
193
            // Break the input string into chunks - but don't break in the middle of a multibyte character
194
            const char* p = begin;
195
            const char* buffer_end = begin + tmp_buffer_size;
196
            while (p < buffer_end) {
197
                size_t len = sequence_length(*p);
198
                p += len;
199
                if (p > buffer_end) {
200
                    p -= len;
201
                    break;
202
                }
203
            }
204
            n = p - begin;
205
        }
206

207
        wchar_t tmp[tmp_buffer_size];
208

209
        int n2 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, begin, int(n), tmp, tmp_buffer_size);
210
        if (n2 == 0)
211
            return util::none;
212

213
        if (n2 < tmp_buffer_size)
214
            tmp[n2] = 0;
215

216
        // Note: If tmp[0] == 0, it is because the string contains a
217
        // null-chacarcter, which is perfectly fine.
218

219
        wchar_t mapped_tmp[tmp_buffer_size];
220
        LCMapStringEx(LOCALE_NAME_INVARIANT, upper ? LCMAP_UPPERCASE : LCMAP_LOWERCASE, tmp, n2, mapped_tmp,
221
                      tmp_buffer_size, nullptr, nullptr, 0);
222

223
        // FIXME: The intention is to use flag 'WC_ERR_INVALID_CHARS'
224
        // to catch invalid UTF-8. Even though the documentation says
225
        // unambigously that it is supposed to work, it doesn't. When
226
        // the flag is specified, the function fails with error
227
        // ERROR_INVALID_FLAGS.
228
        DWORD flags = 0;
229
        auto m = static_cast<int>(end - begin);
230
        int n3 = WideCharToMultiByte(CP_UTF8, flags, mapped_tmp, n2, &*output, m, 0, 0);
231
        if (n3 == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
232
            return util::none;
233

234
        if (n3 != n) {
235
            realm::safe_copy_n(begin, n, output); // Cannot handle different size, copy source
236
        }
237

238
        begin += n;
239
        output += n;
240
    }
241

242
    return result;
243
#else
244
    size_t sz = source.size();
218,574✔
245
    typedef std::char_traits<char> traits;
218,574✔
246
    for (size_t i = 0; i < sz; ++i) {
18,714,351✔
247
        char c = source[i];
18,495,792✔
248
        auto int_val = traits::to_int_type(c);
18,495,792✔
249

9,247,827✔
250
        auto copy_bytes = [&](size_t n) {
9,247,923✔
251
            if (i + n > sz) {
192✔
252
                return false;
6✔
253
            }
6✔
254
            for (size_t j = 1; j < n; j++) {
600✔
255
                result[i++] = c;
414✔
256
                c = source[i];
414✔
257
                if ((c & 0xC0) != 0x80) {
414✔
UNCOV
258
                    return false;
×
UNCOV
259
                }
×
260
            }
414✔
261
            return true;
186✔
262
        };
186✔
263

9,247,827✔
264
        if (int_val < 0x80) {
18,495,792✔
265
            // Handle ASCII
9,247,407✔
266
            if (upper && (c >= 'a' && c <= 'z')) {
18,494,955✔
267
                c -= 0x20;
16,181,484✔
268
            }
16,181,484✔
269
            else if (!upper && (c >= 'A' && c <= 'Z')) {
2,313,471✔
270
                c += 0x20;
807,156✔
271
            }
807,156✔
272
        }
18,494,955✔
273
        else {
837✔
274
            if ((int_val & 0xE0) == 0xc0) {
837✔
275
                // 2 byte utf-8
318✔
276
                if (i + 2 > sz) {
636✔
UNCOV
277
                    return {};
×
UNCOV
278
                }
×
279
                c = source[i + 1];
636✔
280
                if ((c & 0xC0) != 0x80) {
636✔
UNCOV
281
                    return {};
×
UNCOV
282
                }
×
283
                auto u = ((int_val << 6) + (traits::to_int_type(c) & 0x3F)) & 0x7FF;
636✔
284
                // Handle some Latin-1 supplement characters
318✔
285
                if (upper && (u >= 0xE0 && u <= 0xFE && u != 0xF7)) {
636✔
286
                    u -= 0x20;
270✔
287
                }
270✔
288
                else if (!upper && (u >= 0xC0 && u <= 0xDE && u != 0xD7)) {
366✔
289
                    u += 0x20;
180✔
290
                }
180✔
291

318✔
292
                result[i++] = static_cast<char>((u >> 6) | 0xC0);
636✔
293
                c = static_cast<char>((u & 0x3f) | 0x80);
636✔
294
            }
636✔
295
            else if ((int_val & 0xF0) == 0xE0) {
201✔
296
                // 3 byte utf-8
72✔
297
                if (!copy_bytes(3)) {
144✔
UNCOV
298
                    return {};
×
UNCOV
299
                }
×
300
            }
57✔
301
            else if ((int_val & 0xF8) == 0xF0) {
57✔
302
                // 4 byte utf-8
24✔
303
                if (!copy_bytes(4)) {
48✔
304
                    return {};
6✔
305
                }
6✔
306
            }
9✔
307
            else {
9✔
308
                return {};
9✔
309
            }
9✔
310
        }
18,495,777✔
311
        result[i] = c;
18,495,777✔
312
    }
18,495,777✔
313
    return result;
218,568✔
314
#endif
218,574✔
315
}
218,574✔
316

317
std::string case_map(StringData source, bool upper, IgnoreErrorsTag)
318
{
111,309✔
319
    return case_map(source, upper).value_or("");
111,309✔
320
}
111,309✔
321

322
// If needle == haystack, return true. NOTE: This function first
323
// performs a case insensitive *byte* compare instead of one whole
324
// UTF-8 character at a time. This is very fast, but not enough to
325
// guarantee that the strings are identical, so we need to finish off
326
// with a slower but rigorous comparison. The signature is similar in
327
// spirit to std::equal().
328
bool equal_case_fold(StringData haystack, const char* needle_upper, const char* needle_lower)
329
{
105,432✔
330
    for (size_t i = 0; i != haystack.size(); ++i) {
153,978✔
331
        char c = haystack[i];
67,362✔
332
        if (needle_lower[i] != c && needle_upper[i] != c)
67,362✔
333
            return false;
18,816✔
334
    }
67,362✔
335

52,716✔
336
    const char* begin = haystack.data();
96,024✔
337
    const char* end = begin + haystack.size();
86,616✔
338
    const char* i = begin;
86,616✔
339
    while (i != end) {
108,654✔
340
        if (!equal_sequence(i, end, needle_lower + (i - begin)) &&
22,038✔
341
            !equal_sequence(i, end, needle_upper + (i - begin)))
11,481✔
UNCOV
342
            return false;
×
343
    }
22,038✔
344
    return true;
86,616✔
345
}
86,616✔
346

347

348
// Test if needle is a substring of haystack. The signature is similar
349
// in spirit to std::search().
350
size_t search_case_fold(StringData haystack, const char* needle_upper, const char* needle_lower, size_t needle_size)
351
{
7,602✔
352
    // FIXME: This solution is very inefficient. Consider deploying the Boyer-Moore algorithm.
3,801✔
353
    size_t i = 0;
7,602✔
354
    while (needle_size <= haystack.size() - i) {
22,782✔
355
        if (equal_case_fold(haystack.substr(i, needle_size), needle_upper, needle_lower)) {
17,334✔
356
            return i;
2,154✔
357
        }
2,154✔
358
        ++i;
15,180✔
359
    }
15,180✔
360
    return haystack.size(); // Not found
6,525✔
361
}
7,602✔
362

363
/// This method takes an array that maps chars (both upper- and lowercase) to distance that can be moved
364
/// (and zero for chars not in needle), allowing the method to apply Boyer-Moore for quick substring search
365
/// The map is calculated in the StringNode<ContainsIns> class (so it can be reused across searches)
366
bool contains_ins(StringData haystack, const char* needle_upper, const char* needle_lower, size_t needle_size,
367
                  const std::array<uint8_t, 256>& charmap)
368
{
7,422✔
369
    if (needle_size == 0)
7,422✔
UNCOV
370
        return haystack.size() != 0;
×
371

3,711✔
372
    // Prepare vars to avoid lookups in loop
3,711✔
373
    size_t last_char_pos = needle_size - 1;
7,422✔
374
    unsigned char lastCharU = needle_upper[last_char_pos];
7,422✔
375
    unsigned char lastCharL = needle_lower[last_char_pos];
7,422✔
376

3,711✔
377
    // Do Boyer-Moore search
3,711✔
378
    size_t p = last_char_pos;
7,422✔
379
    while (p < haystack.size()) {
14,340✔
380
        unsigned char c = haystack.data()[p]; // Get candidate for last char
7,740✔
381

3,870✔
382
        if (c == lastCharU || c == lastCharL) {
7,740✔
383
            StringData candidate = haystack.substr(p - needle_size + 1, needle_size);
870✔
384
            if (equal_case_fold(candidate, needle_upper, needle_lower))
870✔
385
                return true; // text found!
822✔
386
        }
6,918✔
387

3,459✔
388
        // If we don't have a match, see how far we can move char_pos
3,459✔
389
        if (charmap[c] == 0)
6,918✔
390
            p += needle_size; // char was not present in search string
6,696✔
391
        else
222✔
392
            p += charmap[c];
222✔
393
    }
6,918✔
394

3,711✔
395
    return false;
7,011✔
396
}
7,422✔
397

398
bool string_like_ins(StringData text, StringData upper, StringData lower) noexcept
399
{
10,578✔
400
    if (text.is_null() || lower.is_null()) {
10,578✔
401
        return (text.is_null() && lower.is_null());
×
UNCOV
402
    }
×
403

5,289✔
404
    return StringData::matchlike_ins(text, lower, upper);
10,578✔
405
}
10,578✔
406

407
bool string_like_ins(StringData text, StringData pattern) noexcept
408
{
222✔
409
    if (text.is_null() || pattern.is_null()) {
222✔
410
        return (text.is_null() && pattern.is_null());
30✔
411
    }
30✔
412

96✔
413
    std::string upper = case_map(pattern, true, IgnoreErrors);
192✔
414
    std::string lower = case_map(pattern, false, IgnoreErrors);
192✔
415

96✔
416
    return StringData::matchlike_ins(text, lower.c_str(), upper.c_str());
192✔
417
}
192✔
418

419
} // namespace realm
420

421

422
/*
423
// This is source code for generating the table in utf8_compare() from an allkey.txt file:
424

425
// Unicodes up to and including 'Latin Extended 2' (0...591)
426

427
std::vector<int64_t> order;
428
order.resize(last_latin_extended_2_unicode + 1);
429
std::string line;
430
std::ifstream myfile("d:/allkeys.txt");
431

432
// Read header text
433
for (size_t t = 0; t < 19; t++)
434
    getline(myfile, line);
435

436
// Read payload
437
for (size_t entry = 0; getline(myfile, line); entry++)
438
{
439
    string str = line.substr(0, 4);
440
    int64_t unicode = std::stoul(str, nullptr, 16);
441
    if (unicode < order.size())
442
    order[unicode] = entry;
443
}
444

445
// Emit something that you can copy/paste into the Core source code in unicode.cpp
446
cout << "static const uint32_t collation_order[] = {";
447
for (size_t t = 0; t < order.size(); t++) {
448
    if (t > 0 && t % 40 == 0)
449
        cout << "\n";
450
    cout << order[t] << (t + 1 < order.size() ? ", " : "");
451
}
452

453
cout << "};";
454
myfile.close();
455
*/
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc