• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

STEllAR-GROUP / hpx / #856

28 Dec 2022 02:00AM UTC coverage: 86.602% (+0.05%) from 86.55%
#856

push

StellarBot
Merge #6119

6119: Update CMakeLists.txt r=hkaiser a=khuck

updating the default APEX version


Co-authored-by: Kevin Huck <khuck@cs.uoregon.edu>

174566 of 201573 relevant lines covered (86.6%)

1876093.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.65
/libs/core/program_options/src/utf8_codecvt_facet.cpp
1
/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
2
// utf8_codecvt_facet.ipp
3

4
//  Copyright (c) 2001 Ronald Garcia, Indiana University (garcia@osl.iu.edu)
5
//  Andrew Lumsdaine, Indiana University (lums@osl.iu.edu).
6
//
7
//  SPDX-License-Identifier: BSL-1.0
8
//  Use, modification and distribution is subject to the Boost Software
9
//  License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
10
//  http://www.boost.org/LICENSE_1_0.txt)
11

12
// Please see the comments in <boost/detail/utf8_codecvt_facet.hpp> to
13
// learn how this file should be used.
14

15
#include <hpx/program_options/config.hpp>
16
#include <hpx/program_options/detail/utf8_codecvt_facet.hpp>
17

18
#include <cstddef>
19
#include <cstdlib>    // for multi-byte conversion routines
20
#include <limits>
21

22
// If we don't have wstring, then Unicode support
23
// is not available anyway, so we don't need to even
24
// compile this file.
25

26
/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
27
namespace hpx::program_options::detail {
28

29
    // implementation for wchar_t
30

31
    utf8_codecvt_facet::utf8_codecvt_facet(std::size_t no_locale_manage)
1,254✔
32
      : std::codecvt<wchar_t, char, std::mbstate_t>(no_locale_manage)
1,254✔
33
    {
2,508✔
34
    }
1,254✔
35

36
    utf8_codecvt_facet::~utf8_codecvt_facet() = default;
1,255✔
37

38
    // Translate incoming UTF-8 into UCS-4
39
    std::codecvt_base::result utf8_codecvt_facet::do_in(
21✔
40
        std::mbstate_t& /*state*/, char const* from, char const* from_end,
41
        char const*& from_next, wchar_t* to, wchar_t* to_end,
42
        wchar_t*& to_next) const
43
    {
44
        // Basic algorithm:  The first octet determines how many
45
        // octets total make up the UCS-4 character.  The remaining
46
        // "continuing octets" all begin with "10". To convert, subtract the
47
        // amount that specifies the number of octets from the first
48
        // octet.  Subtract 0x80 (1000 0000) from each continuing octet,
49
        // then mash the whole lot together.  Note that each continuing
50
        // octet only uses 6 bits as unique values, so only shift by multiples
51
        // of 6 to combine.
52
        while (from != from_end && to != to_end)
88✔
53
        {
54
            // Error checking   on the first octet
55
            if (invalid_leading_octet(*from))
67✔
56
            {
57
                from_next = from;
×
58
                to_next = to;
×
59
                return std::codecvt_base::error;
×
60
            }
61

62
            // The first octet is   adjusted by a value dependent upon the
63
            // number   of "continuing octets" encoding the character
64
            int const cont_octet_count =
67✔
65
                static_cast<int>(get_cont_octet_count(*from));
67✔
66
            const wchar_t octet1_modifier_table[] = {
67✔
67
                0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
68

69
            // The unsigned char conversion is necessary in case char is
70
            // signed   (I learned this the hard way)
71
            wchar_t ucs_result = (unsigned char) (*from++) -
134✔
72
                octet1_modifier_table[cont_octet_count];
67✔
73

74
            // Invariants   :
75
            //   1) At the start of the loop,   'i' continuing characters have been
76
            //    processed
77
            //   2) *from   points to the next continuing character to be processed.
78
            int i = 0;
67✔
79
            while (i != cont_octet_count && from != from_end)
81✔
80
            {
81
                // Error checking on continuing characters
82
                if (invalid_continuing_octet(*from))
14✔
83
                {
84
                    from_next = from;
×
85
                    to_next = to;
×
86
                    return std::codecvt_base::error;
×
87
                }
88

89
                ucs_result *= (1 << 6);
14✔
90

91
                // each continuing character has an extra (10xxxxxx)b attached to
92
                // it that must be removed.
93
                ucs_result += (unsigned char) (*from++) - 0x80;
14✔
94
                ++i;
14✔
95
            }
96

97
            // If   the buffer ends with an incomplete unicode character...
98
            if (from == from_end && i != cont_octet_count)
67✔
99
            {
100
                // rewind "from" to before the current character translation
101
                from_next = from - (static_cast<std::size_t>(i) + 1);
×
102
                to_next = to;
×
103
                return std::codecvt_base::partial;
×
104
            }
105
            *to++ = ucs_result;
67✔
106
        }
107
        from_next = from;
21✔
108
        to_next = to;
21✔
109

110
        // Were we done converting or did we run out of destination space?
111
        return from == from_end ? std::codecvt_base::ok :
21✔
112
                                  std::codecvt_base::partial;
113
    }
21✔
114

115
    std::codecvt_base::result utf8_codecvt_facet::do_out(
10✔
116
        std::mbstate_t& /*state*/, wchar_t const* from, wchar_t const* from_end,
117
        wchar_t const*& from_next, char* to, char* to_end, char*& to_next) const
118
    {
119
        // RG - consider merging this table with the other one
120
        const wchar_t octet1_modifier_table[] = {
10✔
121
            0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
122

123
        constexpr wchar_t max_wchar = (std::numeric_limits<wchar_t>::max)();
10✔
124
        while (from != from_end && to != to_end)
66✔
125
        {
126
            // Check for invalid UCS-4 character
127
            if (*from > max_wchar)
56✔
128
            {
129
                from_next = from;
×
130
                to_next = to;
×
131
                return std::codecvt_base::error;
×
132
            }
133

134
            int cont_octet_count = get_cont_octet_out_count(*from);
56✔
135

136
            // RG  - comment this formula better
137
            int shift_exponent = (cont_octet_count) *6;
56✔
138

139
            // Process the first character
140
            *to++ = static_cast<char>(octet1_modifier_table[cont_octet_count] +
112✔
141
                (unsigned char) (*from / (1 << shift_exponent)));
56✔
142

143
            // Process the continuation characters
144
            // Invariants: At   the start of the loop:
145
            //   1) 'i' continuing octets   have been generated
146
            //   2) '*to'   points to the next location to place an octet
147
            //   3) shift_exponent is   6 more than needed for the next octet
148
            int i = 0;
56✔
149
            while (i != cont_octet_count && to != to_end)
63✔
150
            {
151
                shift_exponent -= 6;
7✔
152
                *to++ = static_cast<char>(
7✔
153
                    0x80 + ((*from / (1 << shift_exponent)) % (1 << 6)));
7✔
154
                ++i;
7✔
155
            }
156
            // If   we filled up the out buffer before encoding the character
157
            if (to == to_end && i != cont_octet_count)
56✔
158
            {
159
                from_next = from;
×
160
                to_next = to - (static_cast<std::size_t>(i) + 1);
×
161
                return std::codecvt_base::partial;
×
162
            }
163
            ++from;
56✔
164
        }
165
        from_next = from;
10✔
166
        to_next = to;
10✔
167

168
        // Were we done or did we run out of destination space
169
        return from == from_end ? std::codecvt_base::ok :
10✔
170
                                  std::codecvt_base::partial;
171
    }
10✔
172

173
    // How many char objects can I process to get <= max_limit
174
    // wchar_t objects?
175
    int utf8_codecvt_facet::do_length(std::mbstate_t&, char const* from,
×
176
        char const* from_end, std::size_t max_limit) const noexcept
177
    {
178
        // RG - this code is confusing!  I need a better way to express it.
179
        // and test cases.
180

181
        // Invariants:
182
        // 1) last_octet_count has the size of the last measured character
183
        // 2) char_count holds the number of characters shown to fit
184
        // within the bounds so far (no greater than max_limit)
185
        // 3) from_next points to the octet 'last_octet_count' before the
186
        // last measured character.
187
        std::size_t last_octet_count = 0;
×
188
        std::size_t char_count = 0;
×
189
        char const* from_next = from;
×
190

191
        // Use "<" because the buffer may represent incomplete characters
192
        while (
×
193
            from_next + last_octet_count <= from_end && char_count <= max_limit)
×
194
        {
195
            from_next += last_octet_count;
×
196
            last_octet_count = (get_octet_count(*from_next));
×
197
            ++char_count;
×
198
        }
199
        return static_cast<int>(from_next - from);
×
200
    }
201

202
    unsigned int utf8_codecvt_facet::get_octet_count(
67✔
203
        unsigned char lead_octet) noexcept
204
    {
205
        // if the 0-bit (MSB) is 0, then 1 character
206
        if (lead_octet <= 0x7f)
67✔
207
            return 1;
55✔
208

209
        // Otherwise the count number of consecutive 1 bits starting at MSB
210
        //    assert(0xc0 <= lead_octet && lead_octet <= 0xfd);
211

212
        if (0xc0 <= lead_octet && lead_octet <= 0xdf)
12✔
213
            return 2;
10✔
214
        else if (0xe0 <= lead_octet && lead_octet <= 0xef)
2✔
215
            return 3;
2✔
216
        else if (0xf0 <= lead_octet && lead_octet <= 0xf7)
×
217
            return 4;    // -V112
×
218
        else if (0xf8 <= lead_octet && lead_octet <= 0xfb)
×
219
            return 5;
×
220
        else
221
            return 6;
×
222
    }
67✔
223

224
    namespace detail {
225

226
        template <std::size_t s>
227
        int get_cont_octet_out_count_impl(wchar_t word) noexcept
228
        {
229
            if (word < 0x80)
230
            {
231
                return 0;
232
            }
233
            if (word < 0x800)
234
            {
235
                return 1;
236
            }
237
            return 2;
238
        }
239

240
        template <>
241
        int get_cont_octet_out_count_impl<4>(wchar_t word) noexcept
56✔
242
        {
243
            if (word < 0x80)
56✔
244
            {
245
                return 0;
50✔
246
            }
247
            if (word < 0x800)
6✔
248
            {
249
                return 1;
5✔
250
            }
251

252
            // Note that the following code will generate warnings on some platforms
253
            // where wchar_t is defined as UCS2.  The warnings are superfluous as the
254
            // specialization is never instantitiated with such compilers, but this
255
            // can cause problems if warnings are being treated as errors, so we guard
256
            // against that.  Including <boost/detail/utf8_codecvt_facet.hpp> as we do
257
            // should be enough to get WCHAR_MAX defined.
258
            // cope with VC++ 7.1 or earlier having invalid WCHAR_MAX
259
#if defined(_MSC_VER) && _MSC_VER <= 1310    // 7.1 or earlier
260
            return 2;
261
#elif WCHAR_MAX > 0x10000
262

263
            if (word < 0x10000)
1✔
264
            {
265
                return 2;
1✔
266
            }
267
            if (word < 0x200000)
×
268
            {
269
                return 3;
×
270
            }
271
            if (word < 0x4000000)
×
272
            {
273
                return 4;
×
274
            }
275
            return 5;
×
276

277
#else
278
            return 2;
279
#endif
280
        }
56✔
281

282
    }    // namespace detail
283

284
    // How many "continuing octets" will be needed for this word
285
    // ==   total octets - 1.
286
    int utf8_codecvt_facet::get_cont_octet_out_count(
56✔
287
        wchar_t word) const noexcept
288
    {
289
        return detail::get_cont_octet_out_count_impl<sizeof(wchar_t)>(word);
56✔
290
    }
291
}    // namespace hpx::program_options::detail
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc