#856

Committed 28 Dec 2022 02:00AM UTC coverage: 86.602% (+0.05%) from 86.55%

Build # #856

Build Type

push

Committed by StellarBot

Commit Message

Merge #6119

6119: Update CMakeLists.txt r=hkaiser a=khuck

updating the default APEX version


Co-authored-by: Kevin Huck <khuck@cs.uoregon.edu>

Run Details

174566 of 201573 relevant lines covered (86.6%)

1876093.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.65

/libs/core/program_options/src/utf8_codecvt_facet.cpp

/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
// utf8_codecvt_facet.ipp

//  Copyright (c) 2001 Ronald Garcia, Indiana University (garcia@osl.iu.edu)
//  Andrew Lumsdaine, Indiana University (lums@osl.iu.edu).
//
//  SPDX-License-Identifier: BSL-1.0
//  Use, modification and distribution is subject to the Boost Software
//  License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
//  http://www.boost.org/LICENSE_1_0.txt)

// Please see the comments in <boost/detail/utf8_codecvt_facet.hpp> to
// learn how this file should be used.

#include <hpx/program_options/config.hpp>
#include <hpx/program_options/detail/utf8_codecvt_facet.hpp>

#include <cstddef>
#include <cstdlib>    // for multi-byte conversion routines
#include <limits>

// If we don't have wstring, then Unicode support
// is not available anyway, so we don't need to even
// compile this file.

/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
namespace hpx::program_options::detail {

    // implementation for wchar_t

    utf8_codecvt_facet::utf8_codecvt_facet(std::size_t no_locale_manage)
      : std::codecvt<wchar_t, char, std::mbstate_t>(no_locale_manage)
    {
    }

    utf8_codecvt_facet::~utf8_codecvt_facet() = default;

    // Translate incoming UTF-8 into UCS-4
    std::codecvt_base::result utf8_codecvt_facet::do_in(
        std::mbstate_t& /*state*/, char const* from, char const* from_end,
        char const*& from_next, wchar_t* to, wchar_t* to_end,
        wchar_t*& to_next) const
    {
        // Basic algorithm:  The first octet determines how many
        // octets total make up the UCS-4 character.  The remaining
        // "continuing octets" all begin with "10". To convert, subtract the
        // amount that specifies the number of octets from the first
        // octet.  Subtract 0x80 (1000 0000) from each continuing octet,
        // then mash the whole lot together.  Note that each continuing
        // octet only uses 6 bits as unique values, so only shift by multiples
        // of 6 to combine.
        while (from != from_end && to != to_end)
        {
            // Error checking   on the first octet
            if (invalid_leading_octet(*from))
            {
                from_next = from;
                to_next = to;
                return std::codecvt_base::error;
            }

            // The first octet is   adjusted by a value dependent upon the
            // number   of "continuing octets" encoding the character
            int const cont_octet_count =
                static_cast<int>(get_cont_octet_count(*from));
            const wchar_t octet1_modifier_table[] = {
                0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};

            // The unsigned char conversion is necessary in case char is
            // signed   (I learned this the hard way)
            wchar_t ucs_result = (unsigned char) (*from++) -
                octet1_modifier_table[cont_octet_count];

            // Invariants   :
            //   1) At the start of the loop,   'i' continuing characters have been
            //    processed
            //   2) *from   points to the next continuing character to be processed.
            int i = 0;
            while (i != cont_octet_count && from != from_end)
            {
                // Error checking on continuing characters
                if (invalid_continuing_octet(*from))
                {
                    from_next = from;
                    to_next = to;
                    return std::codecvt_base::error;
                }

                ucs_result *= (1 << 6);

                // each continuing character has an extra (10xxxxxx)b attached to
                // it that must be removed.
                ucs_result += (unsigned char) (*from++) - 0x80;
                ++i;
            }

            // If   the buffer ends with an incomplete unicode character...
            if (from == from_end && i != cont_octet_count)
            {
                // rewind "from" to before the current character translation
                from_next = from - (static_cast<std::size_t>(i) + 1);
                to_next = to;
                return std::codecvt_base::partial;
            }
            *to++ = ucs_result;
        }
        from_next = from;
        to_next = to;

        // Were we done converting or did we run out of destination space?
        return from == from_end ? std::codecvt_base::ok :
                                  std::codecvt_base::partial;
    }

    std::codecvt_base::result utf8_codecvt_facet::do_out(
        std::mbstate_t& /*state*/, wchar_t const* from, wchar_t const* from_end,
        wchar_t const*& from_next, char* to, char* to_end, char*& to_next) const
    {
        // RG - consider merging this table with the other one
        const wchar_t octet1_modifier_table[] = {
            0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};

        constexpr wchar_t max_wchar = (std::numeric_limits<wchar_t>::max)();
        while (from != from_end && to != to_end)
        {
            // Check for invalid UCS-4 character
            if (*from > max_wchar)
            {
                from_next = from;
                to_next = to;
                return std::codecvt_base::error;
            }

            int cont_octet_count = get_cont_octet_out_count(*from);

            // RG  - comment this formula better
            int shift_exponent = (cont_octet_count) *6;

            // Process the first character
            *to++ = static_cast<char>(octet1_modifier_table[cont_octet_count] +
                (unsigned char) (*from / (1 << shift_exponent)));

            // Process the continuation characters
            // Invariants: At   the start of the loop:
            //   1) 'i' continuing octets   have been generated
            //   2) '*to'   points to the next location to place an octet
            //   3) shift_exponent is   6 more than needed for the next octet
            int i = 0;
            while (i != cont_octet_count && to != to_end)
            {
                shift_exponent -= 6;
                *to++ = static_cast<char>(
                    0x80 + ((*from / (1 << shift_exponent)) % (1 << 6)));
                ++i;
            }
            // If   we filled up the out buffer before encoding the character
            if (to == to_end && i != cont_octet_count)
            {
                from_next = from;
                to_next = to - (static_cast<std::size_t>(i) + 1);
                return std::codecvt_base::partial;
            }
            ++from;
        }
        from_next = from;
        to_next = to;

        // Were we done or did we run out of destination space
        return from == from_end ? std::codecvt_base::ok :
                                  std::codecvt_base::partial;
    }

    // How many char objects can I process to get <= max_limit
    // wchar_t objects?
    int utf8_codecvt_facet::do_length(std::mbstate_t&, char const* from,
        char const* from_end, std::size_t max_limit) const noexcept
    {
        // RG - this code is confusing!  I need a better way to express it.
        // and test cases.

        // Invariants:
        // 1) last_octet_count has the size of the last measured character
        // 2) char_count holds the number of characters shown to fit
        // within the bounds so far (no greater than max_limit)
        // 3) from_next points to the octet 'last_octet_count' before the
        // last measured character.
        std::size_t last_octet_count = 0;
        std::size_t char_count = 0;
        char const* from_next = from;

        // Use "<" because the buffer may represent incomplete characters
        while (
            from_next + last_octet_count <= from_end && char_count <= max_limit)
        {
            from_next += last_octet_count;
            last_octet_count = (get_octet_count(*from_next));
            ++char_count;
        }
        return static_cast<int>(from_next - from);
    }

    unsigned int utf8_codecvt_facet::get_octet_count(
        unsigned char lead_octet) noexcept
    {
        // if the 0-bit (MSB) is 0, then 1 character
        if (lead_octet <= 0x7f)
            return 1;

        // Otherwise the count number of consecutive 1 bits starting at MSB
        //    assert(0xc0 <= lead_octet && lead_octet <= 0xfd);

        if (0xc0 <= lead_octet && lead_octet <= 0xdf)
            return 2;
        else if (0xe0 <= lead_octet && lead_octet <= 0xef)
            return 3;
        else if (0xf0 <= lead_octet && lead_octet <= 0xf7)
            return 4;    // -V112
        else if (0xf8 <= lead_octet && lead_octet <= 0xfb)
            return 5;
        else
            return 6;
    }

    namespace detail {

        template <std::size_t s>
        int get_cont_octet_out_count_impl(wchar_t word) noexcept
        {
            if (word < 0x80)
            {
                return 0;
            }
            if (word < 0x800)
            {
                return 1;
            }
            return 2;
        }

        template <>
        int get_cont_octet_out_count_impl<4>(wchar_t word) noexcept
        {
            if (word < 0x80)
            {
                return 0;
            }
            if (word < 0x800)
            {
                return 1;
            }

            // Note that the following code will generate warnings on some platforms
            // where wchar_t is defined as UCS2.  The warnings are superfluous as the
            // specialization is never instantitiated with such compilers, but this
            // can cause problems if warnings are being treated as errors, so we guard
            // against that.  Including <boost/detail/utf8_codecvt_facet.hpp> as we do
            // should be enough to get WCHAR_MAX defined.
            // cope with VC++ 7.1 or earlier having invalid WCHAR_MAX
#if defined(_MSC_VER) && _MSC_VER <= 1310    // 7.1 or earlier
            return 2;
#elif WCHAR_MAX > 0x10000

            if (word < 0x10000)
            {
                return 2;
            }
            if (word < 0x200000)
            {
                return 3;
            }
            if (word < 0x4000000)
            {
                return 4;
            }
            return 5;

#else
            return 2;
#endif
        }

    }    // namespace detail

    // How many "continuing octets" will be needed for this word
    // ==   total octets - 1.
    int utf8_codecvt_facet::get_cont_octet_out_count(
        wchar_t word) const noexcept
    {
        return detail::get_cont_octet_out_count_impl<sizeof(wchar_t)>(word);
    }
}    // namespace hpx::program_options::detail

1	/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
2	// utf8_codecvt_facet.ipp
3
4	// Copyright (c) 2001 Ronald Garcia, Indiana University (garcia@osl.iu.edu)
5	// Andrew Lumsdaine, Indiana University (lums@osl.iu.edu).
6	//
7	// SPDX-License-Identifier: BSL-1.0
8	// Use, modification and distribution is subject to the Boost Software
9	// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
10	// http://www.boost.org/LICENSE_1_0.txt)
11
12	// Please see the comments in <boost/detail/utf8_codecvt_facet.hpp> to
13	// learn how this file should be used.
14
15	#include <hpx/program_options/config.hpp>
16	#include <hpx/program_options/detail/utf8_codecvt_facet.hpp>
17
18	#include <cstddef>
19	#include <cstdlib> // for multi-byte conversion routines
20	#include <limits>
21
22	// If we don't have wstring, then Unicode support
23	// is not available anyway, so we don't need to even
24	// compile this file.
25
26	/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
27	namespace hpx::program_options::detail {
28
29	// implementation for wchar_t
30
31	utf8_codecvt_facet::utf8_codecvt_facet(std::size_t no_locale_manage)	1,254✔
32	: std::codecvt<wchar_t, char, std::mbstate_t>(no_locale_manage)	1,254✔
33	{	2,508✔
34	}	1,254✔
35
36	utf8_codecvt_facet::~utf8_codecvt_facet() = default;	1,255✔
37
38	// Translate incoming UTF-8 into UCS-4
39	std::codecvt_base::result utf8_codecvt_facet::do_in(	21✔
40	std::mbstate_t& /state/, char const* from, char const* from_end,
41	char const& from_next, wchar_t to, wchar_t* to_end,
42	wchar_t*& to_next) const
43	{
44	// Basic algorithm: The first octet determines how many
45	// octets total make up the UCS-4 character. The remaining
46	// "continuing octets" all begin with "10". To convert, subtract the
47	// amount that specifies the number of octets from the first
48	// octet. Subtract 0x80 (1000 0000) from each continuing octet,
49	// then mash the whole lot together. Note that each continuing
50	// octet only uses 6 bits as unique values, so only shift by multiples
51	// of 6 to combine.
52	while (from != from_end && to != to_end)	88✔
53	{
54	// Error checking on the first octet
55	if (invalid_leading_octet(*from))	67✔
56	{
57	from_next = from;	×
58	to_next = to;	×
59	return std::codecvt_base::error;	×
60	}
61
62	// The first octet is adjusted by a value dependent upon the
63	// number of "continuing octets" encoding the character
64	int const cont_octet_count =	67✔
65	static_cast<int>(get_cont_octet_count(*from));	67✔
66	const wchar_t octet1_modifier_table[] = {	67✔
67	0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
68
69	// The unsigned char conversion is necessary in case char is
70	// signed (I learned this the hard way)
71	wchar_t ucs_result = (unsigned char) (*from++) -	134✔
72	octet1_modifier_table[cont_octet_count];	67✔
73
74	// Invariants :
75	// 1) At the start of the loop, 'i' continuing characters have been
76	// processed
77	// 2) *from points to the next continuing character to be processed.
78	int i = 0;	67✔
79	while (i != cont_octet_count && from != from_end)	81✔
80	{
81	// Error checking on continuing characters
82	if (invalid_continuing_octet(*from))	14✔
83	{
84	from_next = from;	×
85	to_next = to;	×
86	return std::codecvt_base::error;	×
87	}
88
89	ucs_result *= (1 << 6);	14✔
90
91	// each continuing character has an extra (10xxxxxx)b attached to
92	// it that must be removed.
93	ucs_result += (unsigned char) (*from++) - 0x80;	14✔
94	++i;	14✔
95	}
96
97	// If the buffer ends with an incomplete unicode character...
98	if (from == from_end && i != cont_octet_count)	67✔
99	{
100	// rewind "from" to before the current character translation
101	from_next = from - (static_cast<std::size_t>(i) + 1);	×
102	to_next = to;	×
103	return std::codecvt_base::partial;	×
104	}
105	*to++ = ucs_result;	67✔
106	}
107	from_next = from;	21✔
108	to_next = to;	21✔
109
110	// Were we done converting or did we run out of destination space?
111	return from == from_end ? std::codecvt_base::ok :	21✔
112	std::codecvt_base::partial;
113	}	21✔
114
115	std::codecvt_base::result utf8_codecvt_facet::do_out(	10✔
116	std::mbstate_t& /state/, wchar_t const* from, wchar_t const* from_end,
117	wchar_t const& from_next, char to, char* to_end, char*& to_next) const
118	{
119	// RG - consider merging this table with the other one
120	const wchar_t octet1_modifier_table[] = {	10✔
121	0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
122
123	constexpr wchar_t max_wchar = (std::numeric_limits<wchar_t>::max)();	10✔
124	while (from != from_end && to != to_end)	66✔
125	{
126	// Check for invalid UCS-4 character
127	if (*from > max_wchar)	56✔
128	{
129	from_next = from;	×
130	to_next = to;	×
131	return std::codecvt_base::error;	×
132	}
133
134	int cont_octet_count = get_cont_octet_out_count(*from);	56✔
135
136	// RG - comment this formula better
137	int shift_exponent = (cont_octet_count) *6;	56✔
138
139	// Process the first character
140	*to++ = static_cast<char>(octet1_modifier_table[cont_octet_count] +	112✔
141	(unsigned char) (*from / (1 << shift_exponent)));	56✔
142
143	// Process the continuation characters
144	// Invariants: At the start of the loop:
145	// 1) 'i' continuing octets have been generated
146	// 2) '*to' points to the next location to place an octet
147	// 3) shift_exponent is 6 more than needed for the next octet
148	int i = 0;	56✔
149	while (i != cont_octet_count && to != to_end)	63✔
150	{
151	shift_exponent -= 6;	7✔
152	*to++ = static_cast<char>(	7✔
153	0x80 + ((*from / (1 << shift_exponent)) % (1 << 6)));	7✔
154	++i;	7✔
155	}
156	// If we filled up the out buffer before encoding the character
157	if (to == to_end && i != cont_octet_count)	56✔
158	{
159	from_next = from;	×
160	to_next = to - (static_cast<std::size_t>(i) + 1);	×
161	return std::codecvt_base::partial;	×
162	}
163	++from;	56✔
164	}
165	from_next = from;	10✔
166	to_next = to;	10✔
167
168	// Were we done or did we run out of destination space
169	return from == from_end ? std::codecvt_base::ok :	10✔
170	std::codecvt_base::partial;
171	}	10✔
172
173	// How many char objects can I process to get <= max_limit
174	// wchar_t objects?
175	int utf8_codecvt_facet::do_length(std::mbstate_t&, char const* from,	×
176	char const* from_end, std::size_t max_limit) const noexcept
177	{
178	// RG - this code is confusing! I need a better way to express it.
179	// and test cases.
180
181	// Invariants:
182	// 1) last_octet_count has the size of the last measured character
183	// 2) char_count holds the number of characters shown to fit
184	// within the bounds so far (no greater than max_limit)
185	// 3) from_next points to the octet 'last_octet_count' before the
186	// last measured character.
187	std::size_t last_octet_count = 0;	×
188	std::size_t char_count = 0;	×
189	char const* from_next = from;	×
190
191	// Use "<" because the buffer may represent incomplete characters
192	while (	×
193	from_next + last_octet_count <= from_end && char_count <= max_limit)	×
194	{
195	from_next += last_octet_count;	×
196	last_octet_count = (get_octet_count(*from_next));	×
197	++char_count;	×
198	}
199	return static_cast<int>(from_next - from);	×
200	}
201
202	unsigned int utf8_codecvt_facet::get_octet_count(	67✔
203	unsigned char lead_octet) noexcept
204	{
205	// if the 0-bit (MSB) is 0, then 1 character
206	if (lead_octet <= 0x7f)	67✔
207	return 1;	55✔
208
209	// Otherwise the count number of consecutive 1 bits starting at MSB
210	// assert(0xc0 <= lead_octet && lead_octet <= 0xfd);
211
212	if (0xc0 <= lead_octet && lead_octet <= 0xdf)	12✔
213	return 2;	10✔
214	else if (0xe0 <= lead_octet && lead_octet <= 0xef)	2✔
215	return 3;	2✔
216	else if (0xf0 <= lead_octet && lead_octet <= 0xf7)	×
217	return 4; // -V112	×
218	else if (0xf8 <= lead_octet && lead_octet <= 0xfb)	×
219	return 5;	×
220	else
221	return 6;	×
222	}	67✔
223
224	namespace detail {
225
226	template <std::size_t s>
227	int get_cont_octet_out_count_impl(wchar_t word) noexcept
228	{
229	if (word < 0x80)
230	{
231	return 0;
232	}
233	if (word < 0x800)
234	{
235	return 1;
236	}
237	return 2;
238	}
239
240	template <>
241	int get_cont_octet_out_count_impl<4>(wchar_t word) noexcept	56✔
242	{
243	if (word < 0x80)	56✔
244	{
245	return 0;	50✔
246	}
247	if (word < 0x800)	6✔
248	{
249	return 1;	5✔
250	}
251
252	// Note that the following code will generate warnings on some platforms
253	// where wchar_t is defined as UCS2. The warnings are superfluous as the
254	// specialization is never instantitiated with such compilers, but this
255	// can cause problems if warnings are being treated as errors, so we guard
256	// against that. Including <boost/detail/utf8_codecvt_facet.hpp> as we do
257	// should be enough to get WCHAR_MAX defined.
258	// cope with VC++ 7.1 or earlier having invalid WCHAR_MAX
259	#if defined(_MSC_VER) && _MSC_VER <= 1310 // 7.1 or earlier
260	return 2;
261	#elif WCHAR_MAX > 0x10000
262
263	if (word < 0x10000)	1✔
264	{
265	return 2;	1✔
266	}
267	if (word < 0x200000)	×
268	{
269	return 3;	×
270	}
271	if (word < 0x4000000)	×
272	{
273	return 4;	×
274	}
275	return 5;	×
276
277	#else
278	return 2;
279	#endif
280	}	56✔
281
282	} // namespace detail
283
284	// How many "continuing octets" will be needed for this word
285	// == total octets - 1.
286	int utf8_codecvt_facet::get_cont_octet_out_count(	56✔
287	wchar_t word) const noexcept
288	{
289	return detail::get_cont_octet_out_count_impl<sizeof(wchar_t)>(word);	56✔
290	}
291	} // namespace hpx::program_options::detail

STEllAR-GROUP / hpx / #856

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous