thomas.goyne_478

Committed 02 Aug 2024 05:19PM UTC coverage: 91.089% (-0.01%) from 91.1%

Build # thomas.goyne_478

Build Type

Pull #7944

Evergreen

Committed by

tgoyne

Commit Message

Only track pending client resets done by the same core version

If the previous attempt at performing a client reset was done with a different
core version then we should retry the client reset as the new version may have
fixed a bug that made the previous attempt fail (or may be a downgrade to a
version before when the bug was introduced). This also simplifies the tracking
as it means that we don't need to be able to read trackers created by different
versions.

This also means that we can freely change the schema of the table, which this
takes advantage of to drop the unused primary key and make the error required,
as we never actually stored null and the code reading it would have crashed if
it encountered a null error.

Pull Request Pull Request #7944: Only track pending client resets done by the same core version

Run Details

102704 of 181534 branches covered (56.58%)

138 of 153 new or added lines in 10 files covered. (90.2%)

85 existing lines in 16 files now uncovered.

216717 of 237917 relevant lines covered (91.09%)

5947762.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.83

/src/realm/unicode.cpp

/*************************************************************************
 *
 * Copyright 2016 Realm Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 **************************************************************************/

#include <realm/unicode.hpp>

#include <algorithm>
#include <clocale>
#include <vector>

#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#else
#include <ctype.h>
#endif

namespace realm {

// clang-format off
// Returns the number of bytes in a UTF-8 sequence whose leading byte is as specified.
size_t sequence_length(char lead)
{
    // keep 'static' else entire array will be pushed to stack at each call
    const static unsigned char lengths[256] = {
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
    };

    return lengths[static_cast<unsigned char>(lead)];
}
// clang-format on

// Check if the next UTF-8 sequence in [begin, end) is identical to
// the one beginning at begin2. If it is, 'begin' is advanced
// accordingly.
bool equal_sequence(const char*& begin, const char* end, const char* begin2)
{
    if (begin[0] != begin2[0])
        return false;

    size_t i = 1;
    if (static_cast<int>(std::char_traits<char>::to_int_type(begin[0])) & 0x80) {
        // All following bytes matching '10xxxxxx' will be considered
        // as part of this character.
        while (begin + i != end) {
            if ((static_cast<int>(std::char_traits<char>::to_int_type(begin[i])) & (0x80 + 0x40)) != 0x80)
                break;
            if (begin[i] != begin2[i])
                return false;
            ++i;
        }
    }

    begin += i;
    return true;
}

// Translate from utf8 char to unicode. No check for invalid utf8; may read out of bounds! Caller must check.
uint32_t utf8value(const char* character)
{
    const unsigned char* c = reinterpret_cast<const unsigned char*>(character);
    size_t len = sequence_length(c[0]);
    uint32_t res = c[0];

    if (len == 1)
        return res;

    res &= (0x3f >> (len - 1));

    for (size_t i = 1; i < len; i++)
        res = ((res << 6) | (c[i] & 0x3f));

    return res;
}

// Converts UTF-8 source into upper or lower case. This function
// preserves the byte length of each UTF-8 character in following way:
// If an output character differs in size, it is simply substituded by
// the original character. This may of course give wrong search
// results in very special cases. Todo.
util::Optional<std::string> case_map(StringData source, bool upper)
{
    std::string result;
    result.resize(source.size());

#if defined(_WIN32)
    constexpr int tmp_buffer_size = 32;
    const char* begin = source.data();
    const char* end = begin + source.size();
    auto output = result.begin();
    while (begin != end) {
        auto n = end - begin;
        if (n > tmp_buffer_size) {
            // Break the input string into chunks - but don't break in the middle of a multibyte character
            const char* p = begin;
            const char* buffer_end = begin + tmp_buffer_size;
            while (p < buffer_end) {
                size_t len = sequence_length(*p);
                p += len;
                if (p > buffer_end) {
                    p -= len;
                    break;
                }
            }
            n = p - begin;
        }

        wchar_t tmp[tmp_buffer_size];

        int n2 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, begin, int(n), tmp, tmp_buffer_size);
        if (n2 == 0)
            return util::none;

        if (n2 < tmp_buffer_size)
            tmp[n2] = 0;

        // Note: If tmp[0] == 0, it is because the string contains a
        // null-chacarcter, which is perfectly fine.

        wchar_t mapped_tmp[tmp_buffer_size];
        LCMapStringEx(LOCALE_NAME_INVARIANT, upper ? LCMAP_UPPERCASE : LCMAP_LOWERCASE, tmp, n2, mapped_tmp,
                      tmp_buffer_size, nullptr, nullptr, 0);

        // FIXME: The intention is to use flag 'WC_ERR_INVALID_CHARS'
        // to catch invalid UTF-8. Even though the documentation says
        // unambigously that it is supposed to work, it doesn't. When
        // the flag is specified, the function fails with error
        // ERROR_INVALID_FLAGS.
        DWORD flags = 0;
        auto m = static_cast<int>(end - begin);
        int n3 = WideCharToMultiByte(CP_UTF8, flags, mapped_tmp, n2, &*output, m, 0, 0);
        if (n3 == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
            return util::none;

        if (n3 != n) {
            realm::safe_copy_n(begin, n, output); // Cannot handle different size, copy source
        }

        begin += n;
        output += n;
    }

    return result;
#else
    size_t sz = source.size();
    typedef std::char_traits<char> traits;
    for (size_t i = 0; i < sz; ++i) {
        char c = source[i];
        auto int_val = traits::to_int_type(c);

        auto copy_bytes = [&](size_t n) {
            if (i + n > sz) {
                return false;
            }
            for (size_t j = 1; j < n; j++) {
                result[i++] = c;
                c = source[i];
                if ((c & 0xC0) != 0x80) {
                    return false;
                }
            }
            return true;
        };

        if (int_val < 0x80) {
            // Handle ASCII
            if (upper && (c >= 'a' && c <= 'z')) {
                c -= 0x20;
            }
            else if (!upper && (c >= 'A' && c <= 'Z')) {
                c += 0x20;
            }
        }
        else {
            if ((int_val & 0xE0) == 0xc0) {
                // 2 byte utf-8
                if (i + 2 > sz) {
                    return {};
                }
                c = source[i + 1];
                if ((c & 0xC0) != 0x80) {
                    return {};
                }
                auto u = ((int_val << 6) + (traits::to_int_type(c) & 0x3F)) & 0x7FF;
                // Handle some Latin-1 supplement characters
                if (upper && (u >= 0xE0 && u <= 0xFE && u != 0xF7)) {
                    u -= 0x20;
                }
                else if (!upper && (u >= 0xC0 && u <= 0xDE && u != 0xD7)) {
                    u += 0x20;
                }

                result[i++] = static_cast<char>((u >> 6) | 0xC0);
                c = static_cast<char>((u & 0x3f) | 0x80);
            }
            else if ((int_val & 0xF0) == 0xE0) {
                // 3 byte utf-8
                if (!copy_bytes(3)) {
                    return {};
                }
            }
            else if ((int_val & 0xF8) == 0xF0) {
                // 4 byte utf-8
                if (!copy_bytes(4)) {
                    return {};
                }
            }
            else {
                return {};
            }
        }
        result[i] = c;
    }
    return result;
#endif
}

std::string case_map(StringData source, bool upper, IgnoreErrorsTag)
{
    return case_map(source, upper).value_or("");
}

// If needle == haystack, return true. NOTE: This function first
// performs a case insensitive *byte* compare instead of one whole
// UTF-8 character at a time. This is very fast, but not enough to
// guarantee that the strings are identical, so we need to finish off
// with a slower but rigorous comparison. The signature is similar in
// spirit to std::equal().
bool equal_case_fold(StringData haystack, const char* needle_upper, const char* needle_lower)
{
    for (size_t i = 0; i != haystack.size(); ++i) {
        char c = haystack[i];
        if (needle_lower[i] != c && needle_upper[i] != c)
            return false;
    }

    const char* begin = haystack.data();
    const char* end = begin + haystack.size();
    const char* i = begin;
    while (i != end) {
        if (!equal_sequence(i, end, needle_lower + (i - begin)) &&
            !equal_sequence(i, end, needle_upper + (i - begin)))
            return false;
    }
    return true;
}


// Test if needle is a substring of haystack. The signature is similar
// in spirit to std::search().
size_t search_case_fold(StringData haystack, const char* needle_upper, const char* needle_lower, size_t needle_size)
{
    // FIXME: This solution is very inefficient. Consider deploying the Boyer-Moore algorithm.
    size_t i = 0;
    while (needle_size <= haystack.size() - i) {
        if (equal_case_fold(haystack.substr(i, needle_size), needle_upper, needle_lower)) {
            return i;
        }
        ++i;
    }
    return haystack.size(); // Not found
}

/// This method takes an array that maps chars (both upper- and lowercase) to distance that can be moved
/// (and zero for chars not in needle), allowing the method to apply Boyer-Moore for quick substring search
/// The map is calculated in the StringNode<ContainsIns> class (so it can be reused across searches)
bool contains_ins(StringData haystack, const char* needle_upper, const char* needle_lower, size_t needle_size,
                  const std::array<uint8_t, 256>& charmap)
{
    if (needle_size == 0)
        return haystack.size() != 0;

    // Prepare vars to avoid lookups in loop
    size_t last_char_pos = needle_size - 1;
    unsigned char lastCharU = needle_upper[last_char_pos];
    unsigned char lastCharL = needle_lower[last_char_pos];

    // Do Boyer-Moore search
    size_t p = last_char_pos;
    while (p < haystack.size()) {
        unsigned char c = haystack.data()[p]; // Get candidate for last char

        if (c == lastCharU || c == lastCharL) {
            StringData candidate = haystack.substr(p - needle_size + 1, needle_size);
            if (equal_case_fold(candidate, needle_upper, needle_lower))
                return true; // text found!
        }

        // If we don't have a match, see how far we can move char_pos
        if (charmap[c] == 0)
            p += needle_size; // char was not present in search string
        else
            p += charmap[c];
    }

    return false;
}

bool string_like_ins(StringData text, StringData upper, StringData lower) noexcept
{
    if (text.is_null() || lower.is_null()) {
        return (text.is_null() && lower.is_null());
    }

    return StringData::matchlike_ins(text, lower, upper);
}

bool string_like_ins(StringData text, StringData pattern) noexcept
{
    if (text.is_null() || pattern.is_null()) {
        return (text.is_null() && pattern.is_null());
    }

    std::string upper = case_map(pattern, true, IgnoreErrors);
    std::string lower = case_map(pattern, false, IgnoreErrors);

    return StringData::matchlike_ins(text, lower.c_str(), upper.c_str());
}

} // namespace realm

1	/*************************************************************************
2	*
3	* Copyright 2016 Realm Inc.
4	*
5	* Licensed under the Apache License, Version 2.0 (the "License");
6	* you may not use this file except in compliance with the License.
7	* You may obtain a copy of the License at
8	*
9	* http://www.apache.org/licenses/LICENSE-2.0
10	*
11	* Unless required by applicable law or agreed to in writing, software
12	* distributed under the License is distributed on an "AS IS" BASIS,
13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14	* See the License for the specific language governing permissions and
15	* limitations under the License.
16	*
17	**************************************************************************/
18
19	#include <realm/unicode.hpp>
20
21	#include <algorithm>
22	#include <clocale>
23	#include <vector>
24
25	#ifdef _WIN32
26	#ifndef NOMINMAX
27	#define NOMINMAX
28	#endif
29	#include <windows.h>
30	#else
31	#include <ctype.h>
32	#endif
33
34	namespace realm {
35
36	// clang-format off
37	// Returns the number of bytes in a UTF-8 sequence whose leading byte is as specified.
38	size_t sequence_length(char lead)
39	{	120,258✔
40	// keep 'static' else entire array will be pushed to stack at each call
41	const static unsigned char lengths[256] = {	120,258✔
42	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	120,258✔
43	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	120,258✔
44	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	120,258✔
45	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1	120,258✔
46	};	120,258✔
47
48	return lengths[static_cast<unsigned char>(lead)];	120,258✔
49	}	120,258✔
50	// clang-format on
51
52	// Check if the next UTF-8 sequence in [begin, end) is identical to
53	// the one beginning at begin2. If it is, 'begin' is advanced
54	// accordingly.
55	bool equal_sequence(const char& begin, const char end, const char* begin2)
56	{	28,110✔
57	if (begin[0] != begin2[0])	28,110✔
58	return false;	924✔
59
60	size_t i = 1;	27,186✔
61	if (static_cast<int>(std::char_traits<char>::to_int_type(begin[0])) & 0x80) {	27,186✔
62	// All following bytes matching '10xxxxxx' will be considered
63	// as part of this character.
64	while (begin + i != end) {	108✔
65	if ((static_cast<int>(std::char_traits<char>::to_int_type(begin[i])) & (0x80 + 0x40)) != 0x80)	90✔
66	break;	24✔
67	if (begin[i] != begin2[i])	66✔
68	return false;	×
69	++i;	66✔
70	}	66✔
71	}	42✔
72
73	begin += i;	27,186✔
74	return true;	27,186✔
75	}	27,186✔
76
77	// Translate from utf8 char to unicode. No check for invalid utf8; may read out of bounds! Caller must check.
78	uint32_t utf8value(const char* character)
79	{	×
80	const unsigned char* c = reinterpret_cast<const unsigned char*>(character);	×
81	size_t len = sequence_length(c[0]);	×
82	uint32_t res = c[0];	×
83
84	if (len == 1)	×
85	return res;	×
86
87	res &= (0x3f >> (len - 1));	×
88
89	for (size_t i = 1; i < len; i++)	×
90	res = ((res << 6) \| (c[i] & 0x3f));	×
91
92	return res;	×
93	}	×
94
95	// Converts UTF-8 source into upper or lower case. This function
96	// preserves the byte length of each UTF-8 character in following way:
97	// If an output character differs in size, it is simply substituded by
98	// the original character. This may of course give wrong search
99	// results in very special cases. Todo.
100	util::Optional<std::string> case_map(StringData source, bool upper)
101	{	231,684✔
102	std::string result;	231,684✔
103	result.resize(source.size());	231,684✔
104
105	#if defined(_WIN32)
106	constexpr int tmp_buffer_size = 32;
107	const char* begin = source.data();
108	const char* end = begin + source.size();
109	auto output = result.begin();
110	while (begin != end) {
111	auto n = end - begin;
112	if (n > tmp_buffer_size) {
113	// Break the input string into chunks - but don't break in the middle of a multibyte character
114	const char* p = begin;
115	const char* buffer_end = begin + tmp_buffer_size;
116	while (p < buffer_end) {
117	size_t len = sequence_length(*p);
118	p += len;
119	if (p > buffer_end) {
120	p -= len;
121	break;
122	}
123	}
124	n = p - begin;
125	}
126
127	wchar_t tmp[tmp_buffer_size];
128
129	int n2 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, begin, int(n), tmp, tmp_buffer_size);
130	if (n2 == 0)
131	return util::none;
132
133	if (n2 < tmp_buffer_size)
134	tmp[n2] = 0;
135
136	// Note: If tmp[0] == 0, it is because the string contains a
137	// null-chacarcter, which is perfectly fine.
138
139	wchar_t mapped_tmp[tmp_buffer_size];
140	LCMapStringEx(LOCALE_NAME_INVARIANT, upper ? LCMAP_UPPERCASE : LCMAP_LOWERCASE, tmp, n2, mapped_tmp,
141	tmp_buffer_size, nullptr, nullptr, 0);
142
143	// FIXME: The intention is to use flag 'WC_ERR_INVALID_CHARS'
144	// to catch invalid UTF-8. Even though the documentation says
145	// unambigously that it is supposed to work, it doesn't. When
146	// the flag is specified, the function fails with error
147	// ERROR_INVALID_FLAGS.
148	DWORD flags = 0;
149	auto m = static_cast<int>(end - begin);
150	int n3 = WideCharToMultiByte(CP_UTF8, flags, mapped_tmp, n2, &*output, m, 0, 0);
151	if (n3 == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
152	return util::none;
153
154	if (n3 != n) {
155	realm::safe_copy_n(begin, n, output); // Cannot handle different size, copy source
156	}
157
158	begin += n;
159	output += n;
160	}
161
162	return result;
163	#else
164	size_t sz = source.size();	231,684✔
165	typedef std::char_traits<char> traits;	231,684✔
166	for (size_t i = 0; i < sz; ++i) {	18,821,352✔
167	char c = source[i];	18,589,674✔
168	auto int_val = traits::to_int_type(c);	18,589,674✔
169
170	auto copy_bytes = [&](size_t n) {	18,589,674✔
171	if (i + n > sz) {	192✔
172	return false;	6✔
173	}	6✔
174	for (size_t j = 1; j < n; j++) {	600✔
175	result[i++] = c;	414✔
176	c = source[i];	414✔
177	if ((c & 0xC0) != 0x80) {	414✔
178	return false;	×
179	}	×
180	}	414✔
181	return true;	186✔
182	};	186✔
183
184	if (int_val < 0x80) {	18,589,674✔
185	// Handle ASCII
186	if (upper && (c >= 'a' && c <= 'z')) {	18,588,846✔
187	c -= 0x20;	16,213,008✔
188	}	16,213,008✔
189	else if (!upper && (c >= 'A' && c <= 'Z')) {	2,375,838✔
190	c += 0x20;	810,972✔
191	}	810,972✔
192	}	18,588,846✔
193	else {	828✔
194	if ((int_val & 0xE0) == 0xc0) {	828✔
195	// 2 byte utf-8
196	if (i + 2 > sz) {	636✔
197	return {};	×
198	}	×
199	c = source[i + 1];	636✔
200	if ((c & 0xC0) != 0x80) {	636✔
201	return {};	×
202	}	×
203	auto u = ((int_val << 6) + (traits::to_int_type(c) & 0x3F)) & 0x7FF;	636✔
204	// Handle some Latin-1 supplement characters
205	if (upper && (u >= 0xE0 && u <= 0xFE && u != 0xF7)) {	636✔
206	u -= 0x20;	270✔
207	}	270✔
208	else if (!upper && (u >= 0xC0 && u <= 0xDE && u != 0xD7)) {	366✔
209	u += 0x20;	180✔
210	}	180✔
211
212	result[i++] = static_cast<char>((u >> 6) \| 0xC0);	636✔
213	c = static_cast<char>((u & 0x3f) \| 0x80);	636✔
214	}	636✔
215	else if ((int_val & 0xF0) == 0xE0) {	192✔
216	// 3 byte utf-8
217	if (!copy_bytes(3)) {	144✔
218	return {};	×
219	}	×
220	}	144✔
221	else if ((int_val & 0xF8) == 0xF0) {	48✔
222	// 4 byte utf-8
223	if (!copy_bytes(4)) {	48✔
224	return {};	6✔
225	}	6✔
226	}	48✔
UNCOV 227	else {	×
UNCOV 228	return {};	×
UNCOV 229	}	×
230	}	828✔
231	result[i] = c;	18,589,668✔
232	}	18,589,668✔
233	return result;	231,678✔
234	#endif	231,684✔
235	}	231,684✔
236
237	std::string case_map(StringData source, bool upper, IgnoreErrorsTag)
238	{	124,416✔
239	return case_map(source, upper).value_or("");	124,416✔
240	}	124,416✔
241
242	// If needle == haystack, return true. NOTE: This function first
243	// performs a case insensitive byte compare instead of one whole
244	// UTF-8 character at a time. This is very fast, but not enough to
245	// guarantee that the strings are identical, so we need to finish off
246	// with a slower but rigorous comparison. The signature is similar in
247	// spirit to std::equal().
248	bool equal_case_fold(StringData haystack, const char* needle_upper, const char* needle_lower)
249	{	110,112✔
250	for (size_t i = 0; i != haystack.size(); ++i) {	187,134✔
251	char c = haystack[i];	99,402✔
252	if (needle_lower[i] != c && needle_upper[i] != c)	99,402✔
253	return false;	22,380✔
254	}	99,402✔
255
256	const char* begin = haystack.data();	87,732✔
257	const char* end = begin + haystack.size();	87,732✔
258	const char* i = begin;	87,732✔
259	while (i != end) {	114,918✔
260	if (!equal_sequence(i, end, needle_lower + (i - begin)) &&	27,186✔
261	!equal_sequence(i, end, needle_upper + (i - begin)))	27,186✔
262	return false;	×
263	}	27,186✔
264	return true;	87,732✔
265	}	87,732✔
266
267
268	// Test if needle is a substring of haystack. The signature is similar
269	// in spirit to std::search().
270	size_t search_case_fold(StringData haystack, const char* needle_upper, const char* needle_lower, size_t needle_size)
271	{	9,318✔
272	// FIXME: This solution is very inefficient. Consider deploying the Boyer-Moore algorithm.
273	size_t i = 0;	9,318✔
274	while (needle_size <= haystack.size() - i) {	25,578✔
275	if (equal_case_fold(haystack.substr(i, needle_size), needle_upper, needle_lower)) {	18,894✔
276	return i;	2,634✔
277	}	2,634✔
278	++i;	16,260✔
279	}	16,260✔
280	return haystack.size(); // Not found	6,684✔
281	}	9,318✔
282
283	/// This method takes an array that maps chars (both upper- and lowercase) to distance that can be moved
284	/// (and zero for chars not in needle), allowing the method to apply Boyer-Moore for quick substring search
285	/// The map is calculated in the StringNode<ContainsIns> class (so it can be reused across searches)
286	bool contains_ins(StringData haystack, const char* needle_upper, const char* needle_lower, size_t needle_size,
287	const std::array<uint8_t, 256>& charmap)
288	{	7,422✔
289	if (needle_size == 0)	7,422✔
290	return haystack.size() != 0;	×
291
292	// Prepare vars to avoid lookups in loop
293	size_t last_char_pos = needle_size - 1;	7,422✔
294	unsigned char lastCharU = needle_upper[last_char_pos];	7,422✔
295	unsigned char lastCharL = needle_lower[last_char_pos];	7,422✔
296
297	// Do Boyer-Moore search
298	size_t p = last_char_pos;	7,422✔
299	while (p < haystack.size()) {	14,340✔
300	unsigned char c = haystack.data()[p]; // Get candidate for last char	7,740✔
301
302	if (c == lastCharU \|\| c == lastCharL) {	7,740✔
303	StringData candidate = haystack.substr(p - needle_size + 1, needle_size);	870✔
304	if (equal_case_fold(candidate, needle_upper, needle_lower))	870✔
305	return true; // text found!	822✔
306	}	870✔
307
308	// If we don't have a match, see how far we can move char_pos
309	if (charmap[c] == 0)	6,918✔
310	p += needle_size; // char was not present in search string	6,696✔
311	else	222✔
312	p += charmap[c];	222✔
313	}	6,918✔
314
315	return false;	6,600✔
316	}	7,422✔
317
318	bool string_like_ins(StringData text, StringData upper, StringData lower) noexcept
319	{	12,294✔
320	if (text.is_null() \|\| lower.is_null()) {	12,294✔
321	return (text.is_null() && lower.is_null());	×
322	}	×
323
324	return StringData::matchlike_ins(text, lower, upper);	12,294✔
325	}	12,294✔
326
327	bool string_like_ins(StringData text, StringData pattern) noexcept
328	{	222✔
329	if (text.is_null() \|\| pattern.is_null()) {	222✔
330	return (text.is_null() && pattern.is_null());	30✔
331	}	30✔
332
333	std::string upper = case_map(pattern, true, IgnoreErrors);	192✔
334	std::string lower = case_map(pattern, false, IgnoreErrors);	192✔
335
336	return StringData::matchlike_ins(text, lower.c_str(), upper.c_str());	192✔
337	}	222✔
338
339	} // namespace realm

realm / realm-core / thomas.goyne_478

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous