#848

Committed 07 Dec 2022 11:00PM CUT coverage: 86.456% (+0.6%) from 85.835%

Build # #848

Build Type

push

Committed by StellarBot

Commit Message

Merge #6096

6096: Forking Boost.Tokenizer r=hkaiser a=hkaiser

- flyby: remove more Boost headers that are not needed anymore

Working towards #3440 

Co-authored-by: Hartmut Kaiser <hartmut.kaiser@gmail.com>

Run Details

525 of 525 new or added lines in 20 files covered. (100.0%)

173087 of 200202 relevant lines covered (86.46%)

1845223.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.68

/libs/core/string_util/include/hpx/string_util/token_functions.hpp

//  Copyright (c) 2022 Hartmut Kaiser
//
//  SPDX-License-Identifier: BSL-1.0
//  Distributed under the Boost Software License, Version 1.0. (See accompanying
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

// Copyright John R. Bandela 2001.

// See http://www.boost.org/libs/tokenizer/ for documentation.

// Revision History:
// 01 Oct 2004   Joaquin M Lopez Munoz
//      Workaround for a problem with string::assign in msvc-stlport
// 06 Apr 2004   John Bandela
//      Fixed a bug involving using char_delimiter with a true input iterator
// 28 Nov 2003   Robert Zeh and John Bandela
//      Converted into "fast" functions that avoid using += when
//      the supplied iterator isn't an input_iterator; based on
//      some work done at Archelon and a version that was checked into
//      the boost CVS for a short period of time.
// 20 Feb 2002   John Maddock
//      Removed using namespace std declarations and added
//      workaround for BOOST_NO_STDC_NAMESPACE (the library
//      can be safely mixed with regex).
// 06 Feb 2002   Jeremy Siek
//      Added char_separator.
// 02 Feb 2002   Jeremy Siek
//      Removed tabs and a little cleanup.

#pragma once

#include <hpx/config.hpp>
#include <hpx/assert.hpp>
#include <hpx/modules/errors.hpp>

#include <algorithm>    // for find_if
#include <cctype>
#include <cwctype>
#include <initializer_list>
#include <iterator>
#include <stdexcept>
#include <string>
#include <vector>

namespace hpx::string_util {

    //=========================================================================
    // The escaped_list_separator class. Which is a model of TokenizerFunction
    // An escaped list is a super-set of what is commonly known as a comma
    // separated value (csv) list.It is separated into fields by a comma or
    // other character. If the delimiting character is inside quotes, then it is
    // counted as a regular character.To allow for embedded quotes in a field,
    // there can be escape sequences using the \ much like C. The role of the
    // comma, the quotation mark, and the escape character (backslash \), can be
    // assigned to other characters.
    template <typename Char,
        typename Traits = typename std::basic_string<Char>::traits_type>
    class escaped_list_separator
    {
    private:
        using string_type = std::basic_string<Char, Traits>;

        struct char_eq
        {
            Char e_;

            explicit char_eq(Char e) noexcept
              : e_(e)
            {
            }

            bool operator()(Char c) noexcept
            {
                return Traits::eq(e_, c);
            }
        };

        string_type escape_;
        string_type c_;
        string_type quote_;
        bool last_ = false;

        bool is_escape(Char e)
        {
            char_eq f(e);
            return std::find_if(escape_.begin(), escape_.end(), f) !=
                escape_.end();
        }

        bool is_c(Char e)
        {
            char_eq f(e);
            return std::find_if(c_.begin(), c_.end(), f) != c_.end();
        }

        bool is_quote(Char e)
        {
            char_eq f(e);
            return std::find_if(quote_.begin(), quote_.end(), f) !=
                quote_.end();
        }

        template <typename iterator, typename Token>
        void do_escape(iterator& next, iterator end, Token& tok)
        {
            if (++next == end)
            {
                HPX_THROW_EXCEPTION(invalid_status,
                    "escaped_list_separator::do_escape",
                    "cannot end with escape");
            }

            if (Traits::eq(*next, 'n'))
            {
                tok += '\n';
                return;
            }
            else if (is_quote(*next) || is_c(*next) || is_escape(*next))
            {
                tok += *next;
                return;
            }
            else
            {
                HPX_THROW_EXCEPTION(invalid_status,
                    "escaped_list_separator::do_escape",
                    "unknown escape sequence");
            }
        }

    public:
        explicit escaped_list_separator(
            Char e = '\\', Char c = ',', Char q = '\"')
          : escape_(1, e)
          , c_(1, c)
          , quote_(1, q)
        {
        }

        escaped_list_separator(
            string_type e, string_type c, string_type q) noexcept
          : escape_(HPX_MOVE(e))
          , c_(HPX_MOVE(c))
          , quote_(HPX_MOVE(q))
        {
        }

        void reset() noexcept
        {
            last_ = false;
        }

        template <typename InputIterator, typename Token>
        bool operator()(InputIterator& next, InputIterator end, Token& tok)
        {
            bool bInQuote = false;
            tok = Token();

            if (next == end)
            {
                if (last_)
                {
                    last_ = false;
                    return true;
                }
                else
                {
                    return false;
                }
            }

            last_ = false;
            for (; next != end; ++next)
            {
                if (is_escape(*next))
                {
                    do_escape(next, end, tok);
                }
                else if (is_c(*next))
                {
                    if (!bInQuote)
                    {
                        // If we are not in quote, then we are done
                        ++next;

                        // The last character was a c, that means there is 1
                        // more blank field
                        last_ = true;
                        return true;
                    }
                    else
                    {
                        tok += *next;
                    }
                }
                else if (is_quote(*next))
                {
                    bInQuote = !bInQuote;
                }
                else
                {
                    tok += *next;
                }
            }
            return true;
        }
    };

    //=========================================================================
    // The classes here are used by offset_separator and char_separator to
    // implement faster assigning of tokens using assign instead of +=

    namespace detail {

        //=====================================================================
        // Tokenizer was broken for wide character separators, at least on
        // Windows, since CRT functions isspace etc only expect values in [0,
        // 0xFF]. Debug build asserts if higher values are passed in. The traits
        // extension class should take care of this. Assuming that the
        // conditional will always get optimized out in the function
        // implementations, argument types are not a problem since both forms of
        // character classifiers expect an int.
        template <typename Traits, int N>
        struct traits_extension_details : public Traits
        {
            using char_type = typename Traits::char_type;

            static bool isspace(char_type c) noexcept
            {
                return std::iswspace(c) != 0;
            }

            static bool ispunct(char_type c) noexcept
            {
                return std::iswpunct(c) != 0;
            }
        };

        template <typename Traits>
        struct traits_extension_details<Traits, 1> : public Traits
        {
            using char_type = typename Traits::char_type;

            static bool isspace(char_type c) noexcept
            {
                return std::isspace(c) != 0;
            }

            static bool ispunct(char_type c) noexcept
            {
                return std::ispunct(c) != 0;
            }
        };

        // In case there is no cwctype header, we implement the checks manually.
        // We make use of the fact that the tested categories should fit in
        // ASCII.
        template <typename Traits>
        struct traits_extension : public Traits
        {
            using char_type = typename Traits::char_type;

            static bool isspace(char_type c) noexcept
            {
                return traits_extension_details<Traits,
                    sizeof(char_type)>::isspace(c);
            }

            static bool ispunct(char_type c) noexcept
            {
                return traits_extension_details<Traits,
                    sizeof(char_type)>::ispunct(c);
            }
        };

        // The assign_or_plus_equal struct contains functions that implement
        // assign, +=, and clearing based on the iterator type. The generic case
        // does nothing for plus_equal and clearing, while passing through the
        // call for assign.
        //
        // When an input iterator is being used, the situation is reversed. The
        // assign method does nothing, plus_equal invokes operator +=, and the
        // clearing method sets the supplied token to the default token
        // constructor's result.
        template <typename IteratorTag>
        struct assign_or_plus_equal
        {
            template <typename Iterator, typename Token>
            static constexpr void assign(Iterator b, Iterator e, Token& t)
            {
                t.assign(b, e);
            }

            template <typename Token, typename Value>
            static constexpr void plus_equal(Token&, Value&&) noexcept
            {
            }

            // If we are doing an assign, there is no need for the the clear.
            template <typename Token>
            static constexpr void clear(Token&) noexcept
            {
            }
        };

        template <>
        struct assign_or_plus_equal<std::input_iterator_tag>
        {
            template <class Iterator, class Token>
            static constexpr void assign(Iterator, Iterator, Token&) noexcept
            {
            }

            template <class Token, class Value>
            static constexpr void plus_equal(Token& t, Value&& v)
            {
                t += HPX_FORWARD(Value, v);
            }

            template <class Token>
            static constexpr void clear(Token& t)
            {
                t = Token();
            }
        };

        template <typename Iterator>
        struct class_iterator_category
        {
            using type = typename Iterator::iterator_category;
        };

        // This portably gets the iterator_tag without partial template
        // specialization
        template <typename Iterator>
        struct get_iterator_category
        {
            using iterator_category =
                std::conditional_t<std::is_pointer_v<Iterator>,
                    std::random_access_iterator_tag,
                    typename class_iterator_category<Iterator>::type>;
        };
    }    // namespace detail

    //===========================================================================
    // The offset_separator class, which is a model of TokenizerFunction. Offset
    // breaks a string into tokens based on a range of offsets
    class offset_separator
    {
    private:
        std::vector<int> offsets_;
        unsigned int current_offset_ = 0;
        bool wrap_offsets_ = true;
        bool return_partial_last_ = true;

    public:
        template <typename Iter>
        offset_separator(Iter begin, Iter end, bool wrap_offsets = true,
            bool return_partial_last = true)
          : offsets_(begin, end)
          , wrap_offsets_(wrap_offsets)
          , return_partial_last_(return_partial_last)
        {
        }

        offset_separator(std::initializer_list<int> init,
            bool wrap_offsets = true, bool return_partial_last = true)
          : offsets_(HPX_MOVE(init))
          , wrap_offsets_(wrap_offsets)
          , return_partial_last_(return_partial_last)
        {
        }

        offset_separator()
          : offsets_(1, 1)
        {
        }

        void reset()
        {
            current_offset_ = 0;
        }

        template <typename InputIterator, typename Token>
        bool operator()(InputIterator& next, InputIterator end, Token& tok)
        {
            using assigner = detail::assign_or_plus_equal<typename detail::
                    get_iterator_category<InputIterator>::iterator_category>;

            HPX_ASSERT(!offsets_.empty());

            assigner::clear(tok);
            InputIterator start(next);

            if (next == end)
            {
                return false;
            }

            if (current_offset_ == offsets_.size())
            {
                if (wrap_offsets_)
                {
                    current_offset_ = 0;
                }
                else
                {
                    return false;
                }
            }

            int c = offsets_[current_offset_];
            int i = 0;
            for (; i < c; ++i)
            {
                if (next == end)
                {
                    break;
                }
                assigner::plus_equal(tok, *next++);
            }
            assigner::assign(start, next, tok);

            if (!return_partial_last_)
            {
                if (i < (c - 1))
                {
                    return false;
                }
            }

            ++current_offset_;
            return true;
        }
    };

    //=========================================================================
    // The char_separator class breaks a sequence of characters into tokens
    // based on the character delimiters (very much like bad old strtok). A
    // delimiter character can either be kept or dropped. A kept delimiter shows
    // up as an output token, whereas a dropped delimiter does not.

    // This class replaces the char_delimiters_separator class. The constructor
    // for the char_delimiters_separator class was too confusing and needed to
    // be deprecated. However, because of the default arguments to the
    // constructor, adding the new constructor would cause ambiguity, so instead
    // I deprecated the whole class. The implementation of the class was also
    // simplified considerably.
    enum class empty_token_policy
    {
        drop,
        keep
    };

    // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
    template <typename Char,
        typename Tr = typename std::basic_string<Char>::traits_type>
    class char_separator
    {
        using Traits = detail::traits_extension<Tr>;
        using string_type = std::basic_string<Char, Tr>;

    public:
        explicit char_separator(Char const* dropped_delims,
            Char const* kept_delims = nullptr,
            empty_token_policy empty_tokens = empty_token_policy::drop)
          : m_dropped_delims(dropped_delims)
          , m_use_ispunct(false)
          , m_use_isspace(false)
          , m_empty_tokens(empty_tokens)
        {
            // Borland workaround
            if (kept_delims)
                m_kept_delims = kept_delims;
        }

        // use ispunct() for kept delimiters and isspace for dropped.
        char_separator() = default;

        void reset() noexcept {}

        template <typename InputIterator, typename Token>
        bool operator()(InputIterator& next, InputIterator end, Token& tok)
        {
            using assigner = detail::assign_or_plus_equal<typename detail::
                    get_iterator_category<InputIterator>::iterator_category>;

            assigner::clear(tok);

            // skip past all dropped_delims
            if (m_empty_tokens == empty_token_policy::drop)
            {
                for (; next != end && is_dropped(*next); ++next)
                {
                }
            }

            InputIterator start(next);

            if (m_empty_tokens == empty_token_policy::drop)
            {
                if (next == end)
                    return false;

                // if we are on a kept_delims move past it and stop
                if (is_kept(*next))
                {
                    assigner::plus_equal(tok, *next);
                    ++next;
                }
                else
                {
                    // append all the non delim characters
                    for (; next != end && !is_dropped(*next) && !is_kept(*next);
                         ++next)
                    {
                        assigner::plus_equal(tok, *next);
                    }
                }
            }
            else
            {    // m_empty_tokens == empty_token_policy::keep

                // Handle empty token at the end
                if (next == end)
                {
                    if (m_output_done == false)
                    {
                        m_output_done = true;
                        assigner::assign(start, next, tok);
                        return true;
                    }
                    else
                    {
                        return false;
                    }
                }

                if (is_kept(*next))
                {
                    if (m_output_done == false)
                    {
                        m_output_done = true;
                    }
                    else
                    {
                        assigner::plus_equal(tok, *next);
                        ++next;
                        m_output_done = false;
                    }
                }
                else if (m_output_done == false && is_dropped(*next))
                {
                    m_output_done = true;
                }
                else
                {
                    if (is_dropped(*next))
                    {
                        start = ++next;
                    }

                    for (; next != end && !is_dropped(*next) && !is_kept(*next);
                         ++next)
                    {
                        assigner::plus_equal(tok, *next);
                    }

                    m_output_done = true;
                }
            }

            assigner::assign(start, next, tok);
            return true;
        }

    private:
        string_type m_kept_delims;
        string_type m_dropped_delims;
        bool m_use_ispunct = true;
        bool m_use_isspace = true;
        empty_token_policy m_empty_tokens = empty_token_policy::drop;
        bool m_output_done = false;

        bool is_kept(Char E) const
        {
            if (m_kept_delims.length())
                return m_kept_delims.find(E) != string_type::npos;
            else if (m_use_ispunct)
            {
                return Traits::ispunct(E) != 0;
            }
            else
            {
                return false;
            }
        }

        bool is_dropped(Char E) const
        {
            if (m_dropped_delims.length())
                return m_dropped_delims.find(E) != string_type::npos;
            else if (m_use_isspace)
            {
                return Traits::isspace(E) != 0;
            }
            else
            {
                return false;
            }
        }
    };

    //===========================================================================
    // The char_delimiters_separator class, which is a model of
    // TokenizerFunction. char_delimiters_separator breaks a string into tokens
    // based on character delimiters. There are 2 types of delimiters.
    // Returnable delimiters can be returned as tokens. These are often
    // punctuation. Nonreturnable delimiters cannot be returned as tokens. These
    // are often whitespace

    // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
    template <typename Char,
        typename Tr = typename std::basic_string<Char>::traits_type>
    class char_delimiters_separator
    {
    private:
        using Traits = detail::traits_extension<Tr>;
        using string_type = std::basic_string<Char, Tr>;

        string_type returnable_;
        string_type nonreturnable_;
        bool return_delims_;
        bool no_ispunct_;
        bool no_isspace_;

        bool is_ret(Char E) const noexcept
        {
            if (returnable_.length())
            {
                return returnable_.find(E) != string_type::npos;
            }
            else
            {
                if (no_ispunct_)
                {
                    return false;
                }
                else
                {
                    int r = Traits::ispunct(E);
                    return r != 0;
                }
            }
        }

        bool is_nonret(Char E) const noexcept
        {
            if (nonreturnable_.length())
            {
                return nonreturnable_.find(E) != string_type::npos;
            }
            else
            {
                if (no_isspace_)
                {
                    return false;
                }
                else
                {
                    int r = Traits::isspace(E);
                    return r != 0;
                }
            }
        }

    public:
        explicit char_delimiters_separator(bool return_delims = false,
            Char const* returnable = nullptr,
            Char const* nonreturnable = nullptr)
          : returnable_(returnable ? returnable : string_type().c_str())
          , nonreturnable_(
                nonreturnable ? nonreturnable : string_type().c_str())
          , return_delims_(return_delims)
          , no_ispunct_(returnable != nullptr)
          , no_isspace_(nonreturnable != nullptr)
        {
        }

        void reset() noexcept {}

    public:
        template <typename InputIterator, typename Token>
        bool operator()(InputIterator& next, InputIterator end, Token& tok)
        {
            tok = Token();

            // skip past all nonreturnable delims
            // skip past the returnable only if we are not returning delims
            for (; next != end &&
                 (is_nonret(*next) || (is_ret(*next) && !return_delims_));
                 ++next)
            {
            }

            if (next == end)
            {
                return false;
            }

            // if we are to return delims and we are one a returnable one move
            // past it and stop
            if (is_ret(*next) && return_delims_)
            {
                tok += *next;
                ++next;
            }
            else
            {
                // append all the non delim characters
                for (; next != end && !is_nonret(*next) && !is_ret(*next);
                     ++next)
                {
                    tok += *next;
                }
            }

            return true;
        }
    };
}    // namespace hpx::string_util

1	// Copyright (c) 2022 Hartmut Kaiser
2	//
3	// SPDX-License-Identifier: BSL-1.0
4	// Distributed under the Boost Software License, Version 1.0. (See accompanying
5	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7	// Copyright John R. Bandela 2001.
8
9	// See http://www.boost.org/libs/tokenizer/ for documentation.
10
11	// Revision History:
12	// 01 Oct 2004 Joaquin M Lopez Munoz
13	// Workaround for a problem with string::assign in msvc-stlport
14	// 06 Apr 2004 John Bandela
15	// Fixed a bug involving using char_delimiter with a true input iterator
16	// 28 Nov 2003 Robert Zeh and John Bandela
17	// Converted into "fast" functions that avoid using += when
18	// the supplied iterator isn't an input_iterator; based on
19	// some work done at Archelon and a version that was checked into
20	// the boost CVS for a short period of time.
21	// 20 Feb 2002 John Maddock
22	// Removed using namespace std declarations and added
23	// workaround for BOOST_NO_STDC_NAMESPACE (the library
24	// can be safely mixed with regex).
25	// 06 Feb 2002 Jeremy Siek
26	// Added char_separator.
27	// 02 Feb 2002 Jeremy Siek
28	// Removed tabs and a little cleanup.
29
30	#pragma once
31
32	#include <hpx/config.hpp>
33	#include <hpx/assert.hpp>
34	#include <hpx/modules/errors.hpp>
35
36	#include <algorithm> // for find_if
37	#include <cctype>
38	#include <cwctype>
39	#include <initializer_list>
40	#include <iterator>
41	#include <stdexcept>
42	#include <string>
43	#include <vector>
44
45	namespace hpx::string_util {
46
47	//=========================================================================
48	// The escaped_list_separator class. Which is a model of TokenizerFunction
49	// An escaped list is a super-set of what is commonly known as a comma
50	// separated value (csv) list.It is separated into fields by a comma or
51	// other character. If the delimiting character is inside quotes, then it is
52	// counted as a regular character.To allow for embedded quotes in a field,
53	// there can be escape sequences using the \ much like C. The role of the
54	// comma, the quotation mark, and the escape character (backslash \), can be
55	// assigned to other characters.
56	template <typename Char,
57	typename Traits = typename std::basic_string<Char>::traits_type>
58	class escaped_list_separator	13,888✔
59	{
60	private:
61	using string_type = std::basic_string<Char, Traits>;
62
63	struct char_eq
64	{
65	Char e_;
66
67	explicit char_eq(Char e) noexcept	736,363✔
68	: e_(e)	736,363✔
69	{
70	}	736,363✔
71
72	bool operator()(Char c) noexcept	1,218,027✔
73	{
74	return Traits::eq(e_, c);	1,218,027✔
75	}
76	};
77
78	string_type escape_;
79	string_type c_;
80	string_type quote_;
81	bool last_ = false;	1,980✔
82
83	bool is_escape(Char e)	247,656✔
84	{
85	char_eq f(e);	247,656✔
86	return std::find_if(escape_.begin(), escape_.end(), f) !=	495,312✔
87	escape_.end();	247,656✔
88	}
89
90	bool is_c(Char e)	247,642✔
91	{
92	char_eq f(e);	247,642✔
93	return std::find_if(c_.begin(), c_.end(), f) != c_.end();	247,642✔
94	}
95
96	bool is_quote(Char e)	241,065✔
97	{
98	char_eq f(e);	241,065✔
99	return std::find_if(quote_.begin(), quote_.end(), f) !=	482,130✔
100	quote_.end();	241,065✔
101	}
102
103	template <typename iterator, typename Token>
104	void do_escape(iterator& next, iterator end, Token& tok)	17✔
105	{
106	if (++next == end)	17✔
107	{
108	HPX_THROW_EXCEPTION(invalid_status,	×
109	"escaped_list_separator::do_escape",
110	"cannot end with escape");
111	}
112
113	if (Traits::eq(*next, 'n'))	17✔
114	{
115	tok += '\n';	×
116	return;	×
117	}
118	else if (is_quote(next) \|\| is_c(next) \|\| is_escape(*next))	17✔
119	{
120	tok += *next;	17✔
121	return;	17✔
122	}
123	else
124	{
125	HPX_THROW_EXCEPTION(invalid_status,	×
126	"escaped_list_separator::do_escape",
127	"unknown escape sequence");
128	}
129	}	17✔
130
131	public:
132	explicit escaped_list_separator(	4✔
133	Char e = '\\', Char c = ',', Char q = '\"')
134	: escape_(1, e)	4✔
135	, c_(1, c)	4✔
136	, quote_(1, q)	4✔
137	{
138	}	4✔
139
140	escaped_list_separator(	1,976✔
141	string_type e, string_type c, string_type q) noexcept
142	: escape_(HPX_MOVE(e))	1,976✔
143	, c_(HPX_MOVE(c))	1,976✔
144	, quote_(HPX_MOVE(q))	1,976✔
145	{
146	}	1,976✔
147
148	void reset() noexcept	3,957✔
149	{
150	last_ = false;	3,957✔
151	}	3,957✔
152
153	template <typename InputIterator, typename Token>
154	bool operator()(InputIterator& next, InputIterator end, Token& tok)	10,537✔
155	{
156	bool bInQuote = false;	10,537✔
157	tok = Token();	10,537✔
158
159	if (next == end)	10,537✔
160	{
161	if (last_)	2,010✔
162	{
163	last_ = false;	30✔
164	return true;	30✔
165	}
166	else
167	{
168	return false;	1,980✔
169	}
170	}
171
172	last_ = false;	8,527✔
173	for (; next != end; ++next)	249,602✔
174	{
175	if (is_escape(*next))	247,652✔
176	{
177	do_escape(next, end, tok);	17✔
178	}	17✔
179	else if (is_c(*next))	247,635✔
180	{
181	if (!bInQuote)	6,587✔
182	{
183	// If we are not in quote, then we are done
184	++next;	6,577✔
185
186	// The last character was a c, that means there is 1
187	// more blank field
188	last_ = true;	6,577✔
189	return true;	6,577✔
190	}
191	else
192	{
193	tok += *next;	10✔
194	}
195	}	10✔
196	else if (is_quote(*next))	241,048✔
197	{
198	bInQuote = !bInQuote;	20✔
199	}	20✔
200	else
201	{
202	tok += *next;	241,028✔
203	}
204	}	241,075✔
205	return true;	1,950✔
206	}	10,537✔
207	};
208
209	//=========================================================================
210	// The classes here are used by offset_separator and char_separator to
211	// implement faster assigning of tokens using assign instead of +=
212
213	namespace detail {
214
215	//=====================================================================
216	// Tokenizer was broken for wide character separators, at least on
217	// Windows, since CRT functions isspace etc only expect values in [0,
218	// 0xFF]. Debug build asserts if higher values are passed in. The traits
219	// extension class should take care of this. Assuming that the
220	// conditional will always get optimized out in the function
221	// implementations, argument types are not a problem since both forms of
222	// character classifiers expect an int.
223	template <typename Traits, int N>
224	struct traits_extension_details : public Traits
225	{
226	using char_type = typename Traits::char_type;
227
228	static bool isspace(char_type c) noexcept
229	{
230	return std::iswspace(c) != 0;
231	}
232
233	static bool ispunct(char_type c) noexcept
234	{
235	return std::iswpunct(c) != 0;
236	}
237	};
238
239	template <typename Traits>
240	struct traits_extension_details<Traits, 1> : public Traits
241	{
242	using char_type = typename Traits::char_type;
243
244	static bool isspace(char_type c) noexcept	98✔
245	{
246	return std::isspace(c) != 0;	98✔
247	}
248
249	static bool ispunct(char_type c) noexcept	92✔
250	{
251	return std::ispunct(c) != 0;	92✔
252	}
253	};
254
255	// In case there is no cwctype header, we implement the checks manually.
256	// We make use of the fact that the tested categories should fit in
257	// ASCII.
258	template <typename Traits>
259	struct traits_extension : public Traits
260	{
261	using char_type = typename Traits::char_type;
262
263	static bool isspace(char_type c) noexcept	98✔
264	{
265	return traits_extension_details<Traits,	98✔
266	sizeof(char_type)>::isspace(c);	98✔
267	}
268
269	static bool ispunct(char_type c) noexcept	92✔
270	{
271	return traits_extension_details<Traits,	92✔
272	sizeof(char_type)>::ispunct(c);	92✔
273	}
274	};
275
276	// The assign_or_plus_equal struct contains functions that implement
277	// assign, +=, and clearing based on the iterator type. The generic case
278	// does nothing for plus_equal and clearing, while passing through the
279	// call for assign.
280	//
281	// When an input iterator is being used, the situation is reversed. The
282	// assign method does nothing, plus_equal invokes operator +=, and the
283	// clearing method sets the supplied token to the default token
284	// constructor's result.
285	template <typename IteratorTag>
286	struct assign_or_plus_equal
287	{
288	template <typename Iterator, typename Token>
289	static constexpr void assign(Iterator b, Iterator e, Token& t)	66,941✔
290	{
291	t.assign(b, e);	66,941✔
292	}	66,941✔
293
294	template <typename Token, typename Value>
295	static constexpr void plus_equal(Token&, Value&&) noexcept	791,675✔
296	{
297	}	791,675✔
298
299	// If we are doing an assign, there is no need for the the clear.
300	template <typename Token>
301	static constexpr void clear(Token&) noexcept	101,027✔
302	{
303	}	101,027✔
304	};
305
306	template <>
307	struct assign_or_plus_equal<std::input_iterator_tag>
308	{
309	template <class Iterator, class Token>
310	static constexpr void assign(Iterator, Iterator, Token&) noexcept
311	{
312	}
313
314	template <class Token, class Value>
315	static constexpr void plus_equal(Token& t, Value&& v)
316	{
317	t += HPX_FORWARD(Value, v);
318	}
319
320	template <class Token>
321	static constexpr void clear(Token& t)
322	{
323	t = Token();
324	}
325	};
326
327	template <typename Iterator>
328	struct class_iterator_category
329	{
330	using type = typename Iterator::iterator_category;
331	};
332
333	// This portably gets the iterator_tag without partial template
334	// specialization
335	template <typename Iterator>
336	struct get_iterator_category
337	{
338	using iterator_category =
339	std::conditional_t<std::is_pointer_v<Iterator>,
340	std::random_access_iterator_tag,
341	typename class_iterator_category<Iterator>::type>;
342	};
343	} // namespace detail
344
345	//===========================================================================
346	// The offset_separator class, which is a model of TokenizerFunction. Offset
347	// breaks a string into tokens based on a range of offsets
348	class offset_separator	63✔
349	{
350	private:
351	std::vector<int> offsets_;
352	unsigned int current_offset_ = 0;	8✔
353	bool wrap_offsets_ = true;	3✔
354	bool return_partial_last_ = true;	3✔
355
356	public:
357	template <typename Iter>
358	offset_separator(Iter begin, Iter end, bool wrap_offsets = true,	4✔
359	bool return_partial_last = true)
360	: offsets_(begin, end)	4✔
361	, wrap_offsets_(wrap_offsets)	4✔
362	, return_partial_last_(return_partial_last)	4✔
363	{
364	}	4✔
365
366	offset_separator(std::initializer_list<int> init,	1✔
367	bool wrap_offsets = true, bool return_partial_last = true)
368	: offsets_(HPX_MOVE(init))	1✔
369	, wrap_offsets_(wrap_offsets)	1✔
370	, return_partial_last_(return_partial_last)	1✔
371	{
372	}	1✔
373
374	offset_separator()	3✔
375	: offsets_(1, 1)	3✔
376	{
377	}	3✔
378
379	void reset()	10✔
380	{
381	current_offset_ = 0;	10✔
382	}	10✔
383
384	template <typename InputIterator, typename Token>
385	bool operator()(InputIterator& next, InputIterator end, Token& tok)	20✔
386	{
387	using assigner = detail::assign_or_plus_equal<typename detail::
388	get_iterator_category<InputIterator>::iterator_category>;
389
390	HPX_ASSERT(!offsets_.empty());	20✔
391
392	assigner::clear(tok);	20✔
393	InputIterator start(next);	20✔
394
395	if (next == end)	20✔
396	{
397	return false;	4✔
398	}
399
400	if (current_offset_ == offsets_.size())	16✔
401	{
402	if (wrap_offsets_)	×
403	{
404	current_offset_ = 0;	×
405	}	×
406	else
407	{
408	return false;	×
409	}
410	}	×
411
412	int c = offsets_[current_offset_];	16✔
413	int i = 0;	16✔
414	for (; i < c; ++i)	55✔
415	{
416	if (next == end)	39✔
417	{
418	break;	×
419	}
420	assigner::plus_equal(tok, *next++);	39✔
421	}	39✔
422	assigner::assign(start, next, tok);	16✔
423
424	if (!return_partial_last_)	16✔
425	{
426	if (i < (c - 1))	×
427	{
428	return false;	×
429	}
430	}	×
431
432	++current_offset_;	16✔
433	return true;	16✔
434	}	20✔
435	};
436
437	//=========================================================================
438	// The char_separator class breaks a sequence of characters into tokens
439	// based on the character delimiters (very much like bad old strtok). A
440	// delimiter character can either be kept or dropped. A kept delimiter shows
441	// up as an output token, whereas a dropped delimiter does not.
442
443	// This class replaces the char_delimiters_separator class. The constructor
444	// for the char_delimiters_separator class was too confusing and needed to
445	// be deprecated. However, because of the default arguments to the
446	// constructor, adding the new constructor would cause ambiguity, so instead
447	// I deprecated the whole class. The implementation of the class was also
448	// simplified considerably.
449	enum class empty_token_policy
450	{
451	drop,
452	keep
453	};
454
455	// The out of the box GCC 2.95 on cygwin does not have a char_traits class.
456	template <typename Char,
457	typename Tr = typename std::basic_string<Char>::traits_type>
458	class char_separator	425,588✔
459	{
460	using Traits = detail::traits_extension<Tr>;
461	using string_type = std::basic_string<Char, Tr>;
462
463	public:
464	explicit char_separator(Char const* dropped_delims,	44,518✔
465	Char const* kept_delims = nullptr,
466	empty_token_policy empty_tokens = empty_token_policy::drop)
467	: m_dropped_delims(dropped_delims)	44,518✔
468	, m_use_ispunct(false)	44,518✔
469	, m_use_isspace(false)	44,518✔
470	, m_empty_tokens(empty_tokens)	44,518✔
471	{
472	// Borland workaround
473	if (kept_delims)	44,518✔
474	m_kept_delims = kept_delims;	97✔
475	}	44,518✔
476
477	// use ispunct() for kept delimiters and isspace for dropped.
478	char_separator() = default;	2✔
479
480	void reset() noexcept {}	138,770✔
481
482	template <typename InputIterator, typename Token>
483	bool operator()(InputIterator& next, InputIterator end, Token& tok)	101,007✔
484	{
485	using assigner = detail::assign_or_plus_equal<typename detail::
486	get_iterator_category<InputIterator>::iterator_category>;
487
488	assigner::clear(tok);	101,007✔
489
490	// skip past all dropped_delims
491	if (m_empty_tokens == empty_token_policy::drop)	101,007✔
492	{
493	for (; next != end && is_dropped(*next); ++next)	126,314✔
494	{
495	}	25,525✔
496	}	100,789✔
497
498	InputIterator start(next);	101,007✔
499
500	if (m_empty_tokens == empty_token_policy::drop)	101,007✔
501	{
502	if (next == end)	100,789✔
503	return false;	33,985✔
504
505	// if we are on a kept_delims move past it and stop
506	if (is_kept(*next))	66,804✔
507	{
508	assigner::plus_equal(tok, *next);	2✔
509	++next;	2✔
510	}	2✔
511	else
512	{
513	// append all the non delim characters
514	for (; next != end && !is_dropped(next) && !is_kept(next);	847,436✔
515	++next)	780,634✔
516	{
517	assigner::plus_equal(tok, *next);	780,634✔
518	}	780,634✔
519	}
520	}	66,804✔
521	else
522	{ // m_empty_tokens == empty_token_policy::keep
523
524	// Handle empty token at the end
525	if (next == end)	218✔
526	{
527	if (m_output_done == false)	98✔
528	{
529	m_output_done = true;	1✔
530	assigner::assign(start, next, tok);	1✔
531	return true;	1✔
532	}
533	else
534	{
535	return false;	97✔
536	}
537	}
538
539	if (is_kept(*next))	120✔
540	{
541	if (m_output_done == false)	5✔
542	{
543	m_output_done = true;	1✔
544	}	1✔
545	else
546	{
547	assigner::plus_equal(tok, *next);	4✔
548	++next;	4✔
549	m_output_done = false;	4✔
550	}
551	}	5✔
552	else if (m_output_done == false && is_dropped(*next))	115✔
553	{
554	m_output_done = true;	2✔
555	}	2✔
556	else
557	{
558	if (is_dropped(*next))	113✔
559	{
560	start = ++next;	16✔
561	}	16✔
562
563	for (; next != end && !is_dropped(next) && !is_kept(next);	11,109✔
564	++next)	10,996✔
565	{
566	assigner::plus_equal(tok, *next);	10,996✔
567	}	10,996✔
568
569	m_output_done = true;	113✔
570	}
571	}
572
573	assigner::assign(start, next, tok);	66,924✔
574	return true;	66,924✔
575	}	101,007✔
576
577	private:
578	string_type m_kept_delims;
579	string_type m_dropped_delims;
580	bool m_use_ispunct = true;	2✔
581	bool m_use_isspace = true;	2✔
582	empty_token_policy m_empty_tokens = empty_token_policy::drop;	2✔
583	bool m_output_done = false;	44,520✔
584
585	bool is_kept(Char E) const	858,559✔
586	{
587	if (m_kept_delims.length())	858,559✔
588	return m_kept_delims.find(E) != string_type::npos;	40✔
589	else if (m_use_ispunct)	858,519✔
590	{
591	return Traits::ispunct(E) != 0;	34✔
592	}
593	else
594	{
595	return false;	858,485✔
596	}
597	}	858,559✔
598
599	bool is_dropped(Char E) const	907,458✔
600	{
601	if (m_dropped_delims.length())	907,458✔
602	return m_dropped_delims.find(E) != string_type::npos;	907,412✔
603	else if (m_use_isspace)	46✔
604	{
605	return Traits::isspace(E) != 0;	46✔
606	}
607	else
608	{
609	return false;	×
610	}
611	}	907,458✔
612	};
613
614	//===========================================================================
615	// The char_delimiters_separator class, which is a model of
616	// TokenizerFunction. char_delimiters_separator breaks a string into tokens
617	// based on character delimiters. There are 2 types of delimiters.
618	// Returnable delimiters can be returned as tokens. These are often
619	// punctuation. Nonreturnable delimiters cannot be returned as tokens. These
620	// are often whitespace
621
622	// The out of the box GCC 2.95 on cygwin does not have a char_traits class.
623	template <typename Char,
624	typename Tr = typename std::basic_string<Char>::traits_type>
625	class char_delimiters_separator	56✔
626	{
627	private:
628	using Traits = detail::traits_extension<Tr>;
629	using string_type = std::basic_string<Char, Tr>;
630
631	string_type returnable_;
632	string_type nonreturnable_;
633	bool return_delims_;
634	bool no_ispunct_;
635	bool no_isspace_;
636
637	bool is_ret(Char E) const noexcept	86✔
638	{
639	if (returnable_.length())	86✔
640	{
641	return returnable_.find(E) != string_type::npos;	28✔
642	}
643	else
644	{
645	if (no_ispunct_)	58✔
646	{
647	return false;	×
648	}
649	else
650	{
651	int r = Traits::ispunct(E);	58✔
652	return r != 0;	58✔
653	}
654	}
655	}	86✔
656
657	bool is_nonret(Char E) const noexcept	75✔
658	{
659	if (nonreturnable_.length())	75✔
660	{
661	return nonreturnable_.find(E) != string_type::npos;	×
662	}
663	else
664	{
665	if (no_isspace_)	75✔
666	{
667	return false;	23✔
668	}
669	else
670	{
671	int r = Traits::isspace(E);	52✔
672	return r != 0;	52✔
673	}
674	}
675	}	75✔
676
677	public:
678	explicit char_delimiters_separator(bool return_delims = false,	4✔
679	Char const* returnable = nullptr,
680	Char const* nonreturnable = nullptr)
681	: returnable_(returnable ? returnable : string_type().c_str())	4✔
682	, nonreturnable_(	8✔
683	nonreturnable ? nonreturnable : string_type().c_str())	4✔
684	, return_delims_(return_delims)	4✔
685	, no_ispunct_(returnable != nullptr)	4✔
686	, no_isspace_(nonreturnable != nullptr)	4✔
687	{
688	}	4✔
689
690	void reset() noexcept {}	5✔
691
692	public:
693	template <typename InputIterator, typename Token>
694	bool operator()(InputIterator& next, InputIterator end, Token& tok)	16✔
695	{
696	tok = Token();	16✔
697
698	// skip past all nonreturnable delims
699	// skip past the returnable only if we are not returning delims
700	for (; next != end &&	57✔
701	(is_nonret(next) \|\| (is_ret(next) && !return_delims_));	27✔
702	++next)	14✔
703	{
704	}	14✔
705
706	if (next == end)	16✔
707	{
708	return false;	3✔
709	}
710
711	// if we are to return delims and we are one a returnable one move
712	// past it and stop
713	if (is_ret(*next) && return_delims_)	13✔
714	{
715	tok += *next;	2✔
716	++next;	2✔
717	}	2✔
718	else
719	{
720	// append all the non delim characters
721	for (; next != end && !is_nonret(next) && !is_ret(next);	49✔
722	++next)	38✔
723	{
724	tok += *next;	38✔
725	}	38✔
726	}
727
728	return true;	13✔
729	}	16✔
730	};
731	} // namespace hpx::string_util

STEllAR-GROUP / hpx / #848

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous