• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

STEllAR-GROUP / hpx / #882

31 Aug 2023 07:44PM UTC coverage: 41.798% (-44.7%) from 86.546%
#882

push

19442 of 46514 relevant lines covered (41.8%)

126375.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.62
/libs/core/string_util/include/hpx/string_util/token_functions.hpp
1
//  Copyright (c) 2022-2025 Hartmut Kaiser
2
//
3
//  SPDX-License-Identifier: BSL-1.0
4
//  Distributed under the Boost Software License, Version 1.0. (See accompanying
5
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6

7
// Copyright John R. Bandela 2001.
8

9
// See http://www.boost.org/libs/tokenizer/ for documentation.
10

11
// Revision History:
12
// 01 Oct 2004   Joaquin M Lopez Munoz
13
//      Workaround for a problem with string::assign in msvc-stlport
14
// 06 Apr 2004   John Bandela
15
//      Fixed a bug involving using char_delimiter with a true input iterator
16
// 28 Nov 2003   Robert Zeh and John Bandela
17
//      Converted into "fast" functions that avoid using += when
18
//      the supplied iterator isn't an input_iterator; based on
19
//      some work done at Archelon and a version that was checked into
20
//      the boost CVS for a short period of time.
21
// 20 Feb 2002   John Maddock
22
//      Removed using namespace std declarations and added
23
//      workaround for BOOST_NO_STDC_NAMESPACE (the library
24
//      can be safely mixed with regex).
25
// 06 Feb 2002   Jeremy Siek
26
//      Added char_separator.
27
// 02 Feb 2002   Jeremy Siek
28
//      Removed tabs and a little cleanup.
29

30
#pragma once
31

32
#include <hpx/config.hpp>
33
#include <hpx/assert.hpp>
34
#include <hpx/modules/errors.hpp>
35

36
#include <algorithm>    // for find_if
37
#include <cctype>
38
#include <cstddef>
39
#include <cstdint>
40
#include <cwctype>
41
#include <initializer_list>
42
#include <iterator>
43
#include <stdexcept>
44
#include <string>
45
#include <vector>
46

47
namespace hpx::string_util {
48

49
    //=========================================================================
50
    // The escaped_list_separator class. Which is a model of TokenizerFunction
51
    // An escaped list is a super-set of what is commonly known as a comma
52
    // separated value (csv) list.It is separated into fields by a comma or
53
    // other character. If the delimiting character is inside quotes, then it is
54
    // counted as a regular character.To allow for embedded quotes in a field,
55
    // there can be escape sequences using the \ much like C. The role of the
56
    // comma, the quotation mark, and the escape character (backslash \), can be
57
    // assigned to other characters.
58
    HPX_CXX_EXPORT template <typename Char,
59
        typename Traits = typename std::basic_string<Char>::traits_type,
60
        typename Allocator = typename std::basic_string<Char>::allocator_type>
61
    class escaped_list_separator
62
    {
63
    private:
64
        using string_type = std::basic_string<Char, Traits, Allocator>;
65

66
        struct char_eq
67
        {
68
            Char e_;
69

31,083✔
70
            explicit char_eq(Char e) noexcept
31,083✔
71
              : e_(e)
72
            {
73
            }
74

75
            bool operator()(Char c) noexcept
76
            {
77
                return Traits::eq(e_, c);
78
            }
79
        };
80

81
        string_type escape_;
82
        string_type c_;
83
        string_type quote_;
84
        bool last_ = false;
85

10,430✔
86
        bool is_escape(Char e)
87
        {
88
            char_eq f(e);
10,430✔
89
            return std::find_if(escape_.begin(), escape_.end(), f) !=
10,430✔
90
                escape_.end();
91
        }
92

10,430✔
93
        bool is_c(Char e)
94
        {
95
            char_eq f(e);
10,430✔
96
            return std::find_if(c_.begin(), c_.end(), f) != c_.end();
97
        }
98

10,223✔
99
        bool is_quote(Char e)
100
        {
101
            char_eq f(e);
10,223✔
102
            return std::find_if(quote_.begin(), quote_.end(), f) !=
10,223✔
103
                quote_.end();
104
        }
105

106
        template <typename Iterator, typename Token>
×
107
        void do_escape(Iterator& next, Iterator end, Token& tok)
108
        {
×
109
            if (++next == end)
110
            {
×
111
                HPX_THROW_EXCEPTION(hpx::error::invalid_status,
112
                    "escaped_list_separator::do_escape",
113
                    "cannot end with escape");
114
            }
115

×
116
            if (Traits::eq(*next, 'n'))
117
            {
118
                tok += '\n';
×
119
                return;
120
            }
×
121
            if (is_quote(*next) || is_c(*next) || is_escape(*next))
122
            {
123
                tok += *next;
×
124
                return;
125
            }
126

127
            HPX_THROW_EXCEPTION(hpx::error::invalid_status,
×
128
                "escaped_list_separator::do_escape", "unknown escape sequence");
129
        }
130

131
    public:
132
        explicit escaped_list_separator(
133
            Char e = '\\', Char c = ',', Char q = '\"')
134
          : escape_(1, e)
×
135
          , c_(1, c)
136
          , quote_(1, q)
×
137
        {
×
138
        }
×
139

140
        escaped_list_separator(
×
141
            string_type e, string_type c, string_type q) noexcept
142
          : escape_(HPX_MOVE(e))
84✔
143
          , c_(HPX_MOVE(c))
144
          , quote_(HPX_MOVE(q))
84✔
145
        {
84✔
146
        }
84✔
147

148
        void reset() noexcept
84✔
149
        {
150
            last_ = false;
151
        }
152

168✔
153
        template <typename InputIterator, typename Token>
154
        bool operator()(InputIterator& next, InputIterator end, Token& tok)
155
        {
156
            bool in_quote = false;
375✔
157
            tok = Token();
158

159
            if (next == end)
750✔
160
            {
161
                if (last_)
375✔
162
                {
163
                    last_ = false;
84✔
164
                    return true;
165
                }
×
166
                else
×
167
                {
168
                    return false;
169
                }
170
            }
171

172
            last_ = false;
173
            for (/**/; next != end; ++next)
174
            {
291✔
175
                if (is_escape(*next))
10,514✔
176
                {
177
                    do_escape(next, end, tok);
10,430✔
178
                }
179
                else if (is_c(*next))
×
180
                {
181
                    if (!in_quote)
10,430✔
182
                    {
183
                        // If we are not in quote, then we are done
207✔
184
                        ++next;
185

186
                        // The last character was a c, that means there is 1
187
                        // more blank field
188
                        last_ = true;
189
                        return true;
190
                    }
207✔
191
                    else
207✔
192
                    {
193
                        tok += *next;
194
                    }
195
                }
196
                else if (is_quote(*next))
197
                {
198
                    in_quote = !in_quote;
10,223✔
199
                }
200
                else
×
201
                {
202
                    tok += *next;
203
                }
204
            }
205
            return true;
206
        }
207
    };
208

209
    //=========================================================================
210
    // The classes here are used by offset_separator and char_separator to
211
    // implement faster assigning of tokens using assign instead of +=
212

213
    namespace detail {
214

215
        //=====================================================================
216
        // Tokenizer was broken for wide character separators, at least on
217
        // Windows, since CRT functions isspace etc only expect values in [0,
218
        // 0xFF]. Debug build asserts if higher values are passed in. The traits
219
        // extension class should take care of this. Assuming that the
220
        // conditional will always get optimized out in the function
221
        // implementations, argument types are not a problem since both forms of
222
        // character classifiers expect an int.
223
        HPX_CXX_EXPORT template <typename Traits, int N>
224
        struct traits_extension_details : public Traits
225
        {
226
            using char_type = typename Traits::char_type;
227

228
            static bool isspace(char_type c) noexcept
229
            {
230
                return std::iswspace(c) != 0;
231
            }
232

233
            static bool ispunct(char_type c) noexcept
234
            {
235
                return std::iswpunct(c) != 0;
236
            }
237
        };
238

239
        HPX_CXX_EXPORT template <typename Traits>
240
        struct traits_extension_details<Traits, 1> : public Traits
241
        {
242
            using char_type = typename Traits::char_type;
243

244
            static bool isspace(char_type c) noexcept
245
            {
246
                return std::isspace(c) != 0;
247
            }
248

×
249
            static bool ispunct(char_type c) noexcept
250
            {
251
                return std::ispunct(c) != 0;
252
            }
253
        };
×
254

255
        // In case there is no cwctype header, we implement the checks manually.
256
        // We make use of the fact that the tested categories should fit in
257
        // ASCII.
258
        HPX_CXX_EXPORT template <typename Traits>
259
        struct traits_extension : public Traits
260
        {
261
            using char_type = typename Traits::char_type;
262

263
            static bool isspace(char_type c) noexcept
264
            {
265
                return traits_extension_details<Traits,
266
                    sizeof(char_type)>::isspace(c);
267
            }
268

269
            static bool ispunct(char_type c) noexcept
270
            {
271
                return traits_extension_details<Traits,
272
                    sizeof(char_type)>::ispunct(c);
273
            }
274
        };
275

276
        // The assign_or_plus_equal struct contains functions that implement
277
        // assign, +=, and clearing based on the iterator type. The generic case
278
        // does nothing for plus_equal and clearing, while passing through the
279
        // call for assign.
280
        //
281
        // When an input iterator is being used, the situation is reversed. The
282
        // assign method does nothing, plus_equal invokes operator +=, and the
283
        // clearing method sets the supplied token to the default token
284
        // constructor's result.
285
        HPX_CXX_EXPORT template <typename IteratorTag>
286
        struct assign_or_plus_equal
287
        {
288
            template <typename Iterator, typename Token>
289
            static constexpr void assign(Iterator b, Iterator e, Token& t)
290
            {
291
                t.assign(b, e);
292
            }
293

294
            template <typename Token, typename Value>
1,656✔
295
            static constexpr void plus_equal(Token&, Value&&) noexcept
296
            {
297
            }
298

299
            // If we are doing an assign, there is no need for the the clear.
300
            template <typename Token>
301
            static constexpr void clear(Token&) noexcept
302
            {
303
            }
304
        };
305

306
        template <>
307
        struct assign_or_plus_equal<std::input_iterator_tag>
308
        {
309
            template <class Iterator, class Token>
310
            static constexpr void assign(Iterator, Iterator, Token&) noexcept
311
            {
312
            }
313

314
            template <class Token, class Value>
315
            static constexpr void plus_equal(Token& t, Value&& v)
316
            {
317
                t += HPX_FORWARD(Value, v);
318
            }
319

320
            template <class Token>
321
            static constexpr void clear(Token& t)
322
            {
323
                t = Token();
324
            }
325
        };
326

327
        HPX_CXX_EXPORT template <typename Iterator>
328
        struct class_iterator_category
329
        {
330
            using type = typename Iterator::iterator_category;
331
        };
332

333
        // This portably gets the iterator_tag without partial template
334
        // specialization
335
        HPX_CXX_EXPORT template <typename Iterator>
336
        struct get_iterator_category
337
        {
338
            using iterator_category =
339
                std::conditional_t<std::is_pointer_v<Iterator>,
340
                    std::random_access_iterator_tag,
341
                    typename class_iterator_category<Iterator>::type>;
342
        };
343
    }    // namespace detail
344

345
    //===========================================================================
346
    // The offset_separator class, which is a model of TokenizerFunction. Offset
347
    // breaks a string into tokens based on a range of offsets
348
    HPX_CXX_EXPORT class offset_separator
349
    {
350
    private:
351
        std::vector<int> offsets_;
352
        unsigned int current_offset_ = 0;
353
        bool wrap_offsets_ = true;
354
        bool return_partial_last_ = true;
355

356
    public:
357
        template <typename Iter>
358
        offset_separator(Iter begin, Iter end, bool wrap_offsets = true,
359
            bool return_partial_last = true)
360
          : offsets_(begin, end)
361
          , wrap_offsets_(wrap_offsets)
362
          , return_partial_last_(return_partial_last)
363
        {
364
        }
365

366
        offset_separator(std::initializer_list<int> init,
367
            bool wrap_offsets = true, bool return_partial_last = true)
368
          : offsets_(HPX_MOVE(init))
369
          , wrap_offsets_(wrap_offsets)
370
          , return_partial_last_(return_partial_last)
371
        {
372
        }
373

374
        offset_separator()
375
          : offsets_(1, 1)
376
        {
377
        }
378

379
        void reset()
380
        {
381
            current_offset_ = 0;
382
        }
383

384
        template <typename InputIterator, typename Token>
385
        bool operator()(InputIterator& next, InputIterator end, Token& tok)
386
        {
387
            using assigner = detail::assign_or_plus_equal<typename detail::
388
                    get_iterator_category<InputIterator>::iterator_category>;
389

390
            HPX_ASSERT(!offsets_.empty());
391

392
            assigner::clear(tok);
393
            InputIterator start(next);
394

395
            if (next == end)
396
            {
397
                return false;
398
            }
399

400
            if (static_cast<std::size_t>(current_offset_) == offsets_.size())
401
            {
402
                if (wrap_offsets_)
403
                {
404
                    current_offset_ = 0;
405
                }
406
                else
407
                {
408
                    return false;
409
                }
410
            }
411

412
            int const c = offsets_[static_cast<std::size_t>(current_offset_)];
413
            int i = 0;
414
            for (; i < c; ++i)
415
            {
416
                if (next == end)
417
                {
418
                    break;
419
                }
420
                assigner::plus_equal(tok, *next++);
421
            }
422
            assigner::assign(start, next, tok);
423

424
            if (!return_partial_last_)
425
            {
426
                if (i < (c - 1))
427
                {
428
                    return false;
429
                }
430
            }
431

432
            ++current_offset_;
433
            return true;
434
        }
435
    };
436

437
    //=========================================================================
438
    // The char_separator class breaks a sequence of characters into tokens
439
    // based on the character delimiters (very much like bad old strtok). A
440
    // delimiter character can either be kept or dropped. A kept delimiter shows
441
    // up as an output token, whereas a dropped delimiter does not.
442

443
    // This class replaces the char_delimiters_separator class. The constructor
444
    // for the char_delimiters_separator class was too confusing and needed to
445
    // be deprecated. However, because of the default arguments to the
446
    // constructor, adding the new constructor would cause ambiguity, so instead
447
    // I deprecated the whole class. The implementation of the class was also
448
    // simplified considerably.
449
    HPX_CXX_EXPORT enum class empty_token_policy : std::uint8_t { drop, keep };
450

451
    HPX_CXX_EXPORT template <typename Char,
452
        typename Traits = typename std::basic_string<Char>::traits_type,
453
        typename Allocator = typename std::basic_string<Char>::allocator_type>
454
    class char_separator
455
    {
456
        using traits_type = detail::traits_extension<Traits>;
457
        using string_type = std::basic_string<Char, Traits, Allocator>;
458

459
    public:
460
        explicit char_separator(Char const* dropped_delims,
2,476✔
461
            Char const* kept_delims = nullptr,
462
            empty_token_policy empty_tokens = empty_token_policy::drop)
463
          : m_dropped_delims(dropped_delims)
464
          , m_use_ispunct(false)
465
          , m_use_isspace(false)
466
          , m_empty_tokens(empty_tokens)
2,540✔
467
        {
468
            if (kept_delims)
469
                m_kept_delims = kept_delims;
2,540✔
470
        }
2,540✔
471

2,540✔
472
        // use ispunct() for kept delimiters and isspace for dropped.
2,540✔
473
        char_separator() = default;
474

2,540✔
475
        static constexpr void reset() noexcept {}
476

2,540✔
477
        template <typename InputIterator, typename Token>
478
        bool operator()(InputIterator& next, InputIterator end, Token& tok)
479
        {
480
            using assigner = detail::assign_or_plus_equal<typename detail::
481
                    get_iterator_category<InputIterator>::iterator_category>;
482

483
            assigner::clear(tok);
484

2,520✔
485
            // skip past all dropped_delims
486
            if (m_empty_tokens == empty_token_policy::drop)
487
            {
488
                for (/**/; next != end && is_dropped(*next); ++next)
489
                {
490
                }
491
            }
492

2,520✔
493
            InputIterator start(next);
494

3,418✔
495
            if (m_empty_tokens == empty_token_policy::drop)
496
            {
497
                if (next == end)
498
                    return false;
499

2,520✔
500
                // if we are on a kept_delims move past it and stop
501
                if (is_kept(*next))
2,520✔
502
                {
503
                    assigner::plus_equal(tok, *next);
2,520✔
504
                    ++next;
505
                }
506
                else
507
                {
1,656✔
508
                    // append all the non delim characters
509
                    for (/**/;
510
                        next != end && !is_dropped(*next) && !is_kept(*next);
511
                        ++next)
512
                    {
513
                        assigner::plus_equal(tok, *next);
514
                    }
515
                }
516
            }
47,194✔
517
            else
518
            {
519
                // m_empty_tokens == empty_token_policy::keep
520

521
                // Handle empty token at the end
522
                if (next == end)
523
                {
524
                    if (!m_output_done)
525
                    {
526
                        m_output_done = true;
527
                        assigner::assign(start, next, tok);
528
                        return true;
×
529
                    }
530
                    else
×
531
                    {
532
                        return false;
×
533
                    }
534
                }
×
535

536
                if (is_kept(*next))
537
                {
538
                    if (!m_output_done)
539
                    {
540
                        m_output_done = true;
541
                    }
542
                    else
×
543
                    {
544
                        assigner::plus_equal(tok, *next);
×
545
                        ++next;
546
                        m_output_done = false;
×
547
                    }
548
                }
549
                else if (!m_output_done && is_dropped(*next))
550
                {
551
                    m_output_done = true;
552
                }
×
553
                else
554
                {
555
                    if (is_dropped(*next))
×
556
                    {
557
                        start = ++next;
×
558
                    }
559

560
                    for (/**/;
561
                        next != end && !is_dropped(*next) && !is_kept(*next);
×
562
                        ++next)
563
                    {
564
                        assigner::plus_equal(tok, *next);
565
                    }
566

567
                    m_output_done = true;
×
568
                }
569
            }
570

571
            assigner::assign(start, next, tok);
572
            return true;
573
        }
×
574

575
    private:
576
        string_type m_kept_delims;
577
        string_type m_dropped_delims;
578
        bool m_use_ispunct = true;
579
        bool m_use_isspace = true;
580
        empty_token_policy m_empty_tokens = empty_token_policy::drop;
581
        bool m_output_done = false;
582

583
        bool is_kept(Char E) const
584
        {
585
            if (m_kept_delims.length())
586
            {
587
                return m_kept_delims.find(E) != string_type::npos;
588
            }
589
            else if (m_use_ispunct)
47,194✔
590
            {
591
                return traits_type::ispunct(E) != 0;
47,194✔
592
            }
593
            return false;
×
594
        }
595

47,194✔
596
        bool is_dropped(Char E) const
597
        {
×
598
            if (m_dropped_delims.length())
599
            {
600
                return m_dropped_delims.find(E) != string_type::npos;
601
            }
602
            else if (m_use_isspace)
48,788✔
603
            {
604
                return traits_type::isspace(E) != 0;
48,788✔
605
            }
606
            return false;
48,788✔
607
        }
608
    };
×
609

610
    //===========================================================================
×
611
    // The char_delimiters_separator class, which is a model of
612
    // TokenizerFunction. char_delimiters_separator breaks a string into tokens
613
    // based on character delimiters. There are 2 types of delimiters.
614
    // Returnable delimiters can be returned as tokens. These are often
615
    // punctuation. Nonreturnable delimiters cannot be returned as tokens. These
616
    // are often whitespace
617

618
    HPX_CXX_EXPORT template <typename Char,
619
        typename Traits = typename std::basic_string<Char>::traits_type,
620
        typename Allocator = typename std::basic_string<Char>::allocator_type>
621
    class char_delimiters_separator
622
    {
623
    private:
624
        using traits_type = detail::traits_extension<Traits>;
625
        using string_type = std::basic_string<Char, Traits, Allocator>;
626

627
        string_type returnable_;
628
        string_type nonreturnable_;
629
        bool return_delims_;
630
        bool no_ispunct_;
631
        bool no_isspace_;
632

633
        bool is_ret(Char E) const noexcept
634
        {
635
            if (returnable_.length())
636
            {
637
                return returnable_.find(E) != string_type::npos;
638
            }
639
            else
640
            {
641
                if (no_ispunct_)
642
                {
643
                    return false;
644
                }
645
                else
646
                {
647
                    int const r = traits_type::ispunct(E);
648
                    return r != 0;
649
                }
650
            }
651
        }
652

653
        bool is_nonret(Char E) const noexcept
654
        {
655
            if (nonreturnable_.length())
656
            {
657
                return nonreturnable_.find(E) != string_type::npos;
658
            }
659
            else
660
            {
661
                if (no_isspace_)
662
                {
663
                    return false;
664
                }
665
                else
666
                {
667
                    int const r = traits_type::isspace(E);
668
                    return r != 0;
669
                }
670
            }
671
        }
672

673
    public:
674
        explicit char_delimiters_separator(bool return_delims = false,
675
            Char const* returnable = nullptr,
676
            Char const* nonreturnable = nullptr)
677
          : returnable_(returnable ? returnable : string_type().c_str())
678
          , nonreturnable_(
679
                nonreturnable ? nonreturnable : string_type().c_str())
680
          , return_delims_(return_delims)
681
          , no_ispunct_(returnable != nullptr)
682
          , no_isspace_(nonreturnable != nullptr)
683
        {
684
        }
685

686
        static constexpr void reset() noexcept {}
687

688
    public:
689
        template <typename InputIterator, typename Token>
690
        bool operator()(InputIterator& next, InputIterator end, Token& tok)
691
        {
692
            tok = Token();
693

694
            // skip past all nonreturnable delims
695
            // skip past the returnable only if we are not returning delims
696
            for (/**/; next != end &&
697
                (is_nonret(*next) || (is_ret(*next) && !return_delims_));
698
                ++next)
699
            {
700
            }
701

702
            if (next == end)
703
            {
704
                return false;
705
            }
706

707
            // if we are to return delims and we are one a returnable one move
708
            // past it and stop
709
            if (is_ret(*next) && return_delims_)
710
            {
711
                tok += *next;
712
                ++next;
713
            }
714
            else
715
            {
716
                // append all the non delim characters
717
                for (/**/; next != end && !is_nonret(*next) && !is_ret(*next);
718
                    ++next)
719
                {
720
                    tok += *next;
721
                }
722
            }
723

724
            return true;
725
        }
726
    };
727
}    // namespace hpx::string_util
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc