• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

STEllAR-GROUP / hpx / #882

31 Aug 2023 07:44PM UTC coverage: 41.798% (-44.7%) from 86.546%
#882

push

19442 of 46514 relevant lines covered (41.8%)

126375.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/libs/core/program_options/include/hpx/program_options/detail/utf8_codecvt_facet.hpp
1
// Copyright (c) 2001 Ronald Garcia, Indiana University (garcia@osl.iu.edu)
2
// Andrew Lumsdaine, Indiana University (lums@osl.iu.edu).
3
//
4
// SPDX-License-Identifier: BSL-1.0
5
// Distributed under the Boost Software License, Version 1.0. (See accompany-
6
// ing file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7

8
#pragma once
9

10
/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
11
// utf8_codecvt_facet.hpp
12

13
// This header defines class utf8_codecvt_facet, derived from
14
// std::codecvt<wchar_t, char>, which can be used to convert utf8 data in
15
// files into wchar_t strings in the application.
16
//
17
// The header is NOT STANDALONE, and is not to be included by the USER.
18
// There are at least two libraries which want to use this functionality, and
19
// we want to avoid code duplication. It would be possible to create utf8
20
// library, but:
21
// - this requires review process first
22
// - in the case, when linking the a library which uses utf8
23
//   (say 'program_options'), user should also link to the utf8 library.
24
//   This seems inconvenient, and asking a user to link to an unreviewed
25
//   library is strange.
26
// Until the above points are fixed, a library which wants to use utf8 must:
27
// - include this header in one of it's headers or sources
28
// - include the corresponding boost/detail/utf8_codecvt_facet.ipp file in one
29
//   of its sources
30
// - before including either file, the library must define
31
//   - BOOST_UTF8_BEGIN_NAMESPACE to the namespace declaration that must be used
32
//   - BOOST_UTF8_END_NAMESPACE to the code to close the previous namespace
33
//     declaration.
34
//   - BOOST_UTF8_DECL -- to the code which must be used for all 'exportable'
35
//     symbols.
36
//
37
// For example, program_options library might contain:
38
//    #define BOOST_UTF8_BEGIN_NAMESPACE <backslash character>
39
//             namespace boost { namespace program_options {
40
//    #define BOOST_UTF8_END_NAMESPACE }}
41
//    #define BOOST_UTF8_DECL BOOST_PROGRAM_OPTIONS_DECL
42
//    #include <boost/detail/utf8_codecvt_facet.ipp>
43
//
44
// Essentially, each library will have its own copy of utf8 code, in
45
// different namespaces.
46

47
// Note:(Robert Ramey).  I have made the following alterations in the original
48
// code.
49
// a) Rendered utf8_codecvt<wchar_t, char>  with using templates
50
// b) Move longer functions outside class definition to prevent inlining
51
// and make code smaller
52
// c) added on a derived class to permit translation to/from current
53
// locale to utf8
54

55
//  See http://www.boost.org for updates, documentation, and revision history.
56

57
// archives stored as text - note these ar templated on the basic
58
// stream templates to accommodate wide (and other?) kind of characters
59
//
60
// note the fact that on libraries without wide characters, ostream is
61
// is not a specialization of basic_ostream which in fact is not defined
62
// in such cases.   So we can't use basic_ostream<OStream::char_type> but rather
63
// use two template parameters
64
//
65
// utf8_codecvt_facet
66
//   This is an implementation of a std::codecvt facet for translating
67
//   from UTF-8 externally to UCS-4.  Note that this is not tied to
68
//   any specific types in order to allow customization on platforms
69
//   where wchar_t is not big enough.
70
//
71
// NOTES:  The current implementation jumps through some unpleasant hoops in
72
// order to deal with signed character types.  As a std::codecvt_base::result,
73
// it is necessary  for the ExternType to be convertible to unsigned  char.
74
// I chose not to tie the extern_type explicitly to char. But if any combination
75
// of types other than <wchar_t,char_t> is used, then std::codecvt must be
76
// specialized on those types for this to work.
77

78
#include <hpx/program_options/config.hpp>
79

80
#include <cstddef>    // for std::size_t
81
#include <cwchar>     // for mbstate_t
82
#include <locale>
83

84
//----------------------------------------------------------------------------//
85
//                                                                            //
86
//                          utf8_codecvt_facet                                //
87
//                                                                            //
88
//            See utf8_codecvt_facet.cpp for the implementation.              //
89
//----------------------------------------------------------------------------//
90

91
namespace hpx::program_options::detail {
92

93
    HPX_CXX_EXPORT struct HPX_CORE_EXPORT utf8_codecvt_facet
94
      : public std::codecvt<wchar_t, char, std::mbstate_t>
95
    {
96
    public:
97
        explicit utf8_codecvt_facet(std::size_t no_locale_manage = 0);
98
        virtual ~utf8_codecvt_facet();
99

100
    protected:
101
        std::codecvt_base::result do_in(std::mbstate_t& state, char const* from,
102
            char const* from_end, char const*& from_next, wchar_t* to,
103
            wchar_t* to_end, wchar_t*& to_next) const override;
104

105
        std::codecvt_base::result do_out(std::mbstate_t& state,
106
            wchar_t const* from, wchar_t const* from_end,
107
            wchar_t const*& from_next, char* to, char* to_end,
108
            char*& to_next) const override;
109

110
        static bool invalid_continuing_octet(unsigned char octet_1) noexcept
111
        {
112
            return (octet_1 < 0x80 || 0xbf < octet_1);
113
        }
114

115
        static bool invalid_leading_octet(unsigned char octet_1) noexcept
116
        {
117
            return (0x7f < octet_1 && octet_1 < 0xc0) || (octet_1 > 0xfd);
118
        }
119

120
        // continuing octets = octets except for the leading octet
×
121
        static unsigned int get_cont_octet_count(
122
            unsigned char lead_octet) noexcept
123
        {
124
            return get_octet_count(lead_octet) - 1;
125
        }
×
126

127
        static unsigned int get_octet_count(unsigned char lead_octet) noexcept;
128

129
        // How many "continuing octets" will be needed for this word
130
        // ==   total octets - 1.
131
        int get_cont_octet_out_count(wchar_t word) const noexcept;
132

×
133
        bool do_always_noconv() const noexcept override
134
        {
135
            return false;
136
        }
137

138
        // UTF-8 isn't really stateful since we rewind on partial conversions
139
        std::codecvt_base::result do_unshift(std::mbstate_t&, char* from,
140
            char* /*to*/, char*& next) const override
141
        {
×
142
            next = from;
143
            return ok;
×
144
        }
145

146
        int do_encoding() const noexcept override
147
        {
×
148
            constexpr int variable_byte_external_encoding = 0;
149
            return variable_byte_external_encoding;
150
        }
×
151

×
152
        // How many char objects can I process to get <= max_limit
153
        // wchar_t objects?
154
        int do_length(std::mbstate_t&, char const* from, char const* from_end,
×
155
            std::size_t max_limit) const noexcept override;
156

157
        // Nonstandard override
×
158
        virtual int do_length(std::mbstate_t const& s,    //-V835
159
            char const* from, char const* from_end,
160
            std::size_t max_limit) const noexcept
161
        {
162
            return do_length(
163
                const_cast<std::mbstate_t&>(s), from, from_end, max_limit);
164
        }
165

166
        // Largest possible value do_length(state,from,from_end,1) could return.
×
167
        int do_max_length() const noexcept override
168
        {
169
            return 6;    // largest UTF-8 encoding of a UCS-4 character
170
        }
×
171
    };
×
172
}    // namespace hpx::program_options::detail
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc