• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

KazDragon / terminalpp / 24532440096

16 Apr 2026 08:30PM UTC coverage: 97.051% (-0.8%) from 97.867%
24532440096

Pull #319

github

web-flow
Merge 06f67937c into 0f39c0bd9
Pull Request #319: Support for UTF-8

162 of 181 new or added lines in 8 files covered. (89.5%)

20 existing lines in 5 files now uncovered.

1777 of 1831 relevant lines covered (97.05%)

235.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.24
/src/glyph.cpp
1
#include "terminalpp/glyph.hpp"
2

3
#include <format>
4

5
#include <iostream>
6
#include <deque>
7
#include <limits>
8
#include <map>
9
#include <mutex>
10

11
namespace terminalpp {
12

13
namespace {
14

15
constexpr byte spilled_utf8_marker = 0xFF;
16

17
std::uint32_t utf8_decode(bytes data) noexcept;
18
std::size_t inline_utf8_length(glyph const &gly) noexcept;
19
std::uint32_t decode_spilled_utf8_id(glyph const &gly) noexcept;
20

21
struct spilled_utf8_pool
22
{
23
    std::mutex mutex_;
24
    std::deque<byte_storage> storage_;
25
    std::map<byte_storage, std::uint32_t> lookup_;
26
};
27

28
spilled_utf8_pool &pool()
12✔
29
{
30
    static spilled_utf8_pool result;
12✔
31
    return result;
12✔
32
}
33

34
std::uint32_t intern_utf8_bytes(byte const *text, std::size_t length) noexcept
6✔
35
{
36
    auto &utf8_pool = pool();
6✔
37
    std::lock_guard lock{utf8_pool.mutex_};
6✔
38

39
    byte_storage storage;
6✔
40
    storage.reserve(length);
6✔
41

42
    for (std::size_t index = 0; index < length; ++index)
48✔
43
    {
44
        storage.push_back(text[index]);
42✔
45
    }
46

47
    if (auto const iter = utf8_pool.lookup_.find(storage);
6✔
48
        iter != utf8_pool.lookup_.end())
6✔
49
    {
50
        return iter->second;
4✔
51
    }
52

53
    if (utf8_pool.storage_.size()
2✔
54
        >= std::size_t(std::numeric_limits<std::uint32_t>::max() >> 8))
2✔
55
    {
NEW
UNCOV
56
        return 0;
×
57
    }
58

59
    auto const id =
60
        static_cast<std::uint32_t>(utf8_pool.storage_.size() + std::size_t{1U});
2✔
61
    utf8_pool.storage_.push_back(storage);
2✔
62
    utf8_pool.lookup_.emplace(utf8_pool.storage_.back(), id);
2✔
63
    return id;
2✔
64
}
6✔
65

66
}  // namespace
67

68
static bool const is_printable_dec[] = {
69
    // clang-format off
70
/* 0x00 - 0x07 */ false, false, false, false, false, false, false, false,
71
/* 0x08 - 0x0F */ false, false, true,  false, false, false, false, false,
72
/* 0x10 - 0x17 */ false, false, false, false, false, false, false, false,
73
/* 0x18 - 0x1F */ false, false, false, false, false, false, false, false,
74
/* 0x20 - 0x27 */ true,  true,  true,  true,  true,  true,  true,  true,
75
/* 0x28 - 0x2F */ true,  true,  true,  true,  true,  true,  true,  true,
76
/* 0x30 - 0x37 */ true,  true,  true,  true,  true,  true,  true,  true,
77
/* 0x38 - 0x3F */ true,  true,  true,  true,  true,  true,  true,  true,
78
/* 0x40 - 0x47 */ true,  true,  true,  true,  true,  true,  true,  true,
79
/* 0x48 - 0x4F */ true,  true,  true,  true,  true,  true,  true,  true,
80
/* 0x50 - 0x57 */ true,  true,  true,  true,  true,  true,  true,  true,
81
/* 0x58 - 0x5F */ true,  true,  true,  true,  true,  true,  true,  true,
82
/* 0x60 - 0x67 */ true,  true,  true,  true,  true,  true,  true,  true,
83
/* 0x68 - 0x6F */ true,  true,  true,  true,  true,  true,  true,  true,
84
/* 0x70 - 0x77 */ true,  true,  true,  true,  true,  true,  true,  true,
85
/* 0x78 - 0x7F */ true,  true,  true,  true,  true,  true,  true,  false,
86
/* 0x80 - 0x87 */ false, false, false, false, false, false, false, false,
87
/* 0x88 - 0x8F */ false, false, false, false, false, false, false, false,
88
/* 0x90 - 0x97 */ false, false, false, false, false, false, false, false,
89
/* 0x98 - 0x9F */ false, false, false, false, false, false, false, false,
90
/* 0xA0 - 0xA7 */ true,  true,  true,  true,  true,  true,  true,  true,
91
/* 0xA8 - 0xAF */ true,  true,  true,  true,  true,  true,  true,  true,
92
/* 0xB0 - 0xB7 */ true,  true,  true,  true,  true,  true,  true,  true,
93
/* 0xB8 - 0xBF */ true,  true,  true,  true,  true,  true,  true,  true,
94
/* 0xC0 - 0xC7 */ true,  true,  true,  true,  true,  true,  true,  true,
95
/* 0xC8 - 0xCF */ true,  true,  true,  true,  true,  true,  true,  true,
96
/* 0xD0 - 0xD7 */ true,  true,  true,  true,  true,  true,  true,  true,
97
/* 0xD8 - 0xDF */ true,  true,  true,  true,  true,  true,  true,  true,
98
/* 0xE0 - 0xE7 */ true,  true,  true,  true,  true,  true,  true,  true,
99
/* 0xE8 - 0xEF */ true,  true,  true,  true,  true,  true,  true,  true,
100
/* 0xF0 - 0xF7 */ true,  true,  true,  true,  true,  true,  true,  true,
101
/* 0xF8 - 0xFF */ true,  true,  true,  true,  true,  true,  true,  false,
102
    // clang-format on
103
};
104

105
static bool const is_printable_uk[] = {
106
    // clang-format off
107
/* 0x00 - 0x07 */ false, false, false, false, false, false, false, false,
108
/* 0x08 - 0x0F */ false, false, true,  false, false, false, false, false,
109
/* 0x10 - 0x17 */ false, false, false, false, false, false, false, false,
110
/* 0x18 - 0x1F */ false, false, false, false, false, false, false, false,
111
/* 0x20 - 0x27 */ true,  true,  true,  true,  true,  true,  true,  true,
112
/* 0x28 - 0x2F */ true,  true,  true,  true,  true,  true,  true,  true,
113
/* 0x30 - 0x37 */ true,  true,  true,  true,  true,  true,  true,  true,
114
/* 0x38 - 0x3F */ true,  true,  true,  true,  true,  true,  true,  true,
115
/* 0x40 - 0x47 */ true,  true,  true,  true,  true,  true,  true,  true,
116
/* 0x48 - 0x4F */ true,  true,  true,  true,  true,  true,  true,  true,
117
/* 0x50 - 0x57 */ true,  true,  true,  true,  true,  true,  true,  true,
118
/* 0x58 - 0x5F */ true,  true,  true,  true,  true,  true,  true,  true,
119
/* 0x60 - 0x67 */ true,  true,  true,  true,  true,  true,  true,  true,
120
/* 0x68 - 0x6F */ true,  true,  true,  true,  true,  true,  true,  true,
121
/* 0x70 - 0x77 */ true,  true,  true,  true,  true,  true,  true,  true,
122
/* 0x78 - 0x7F */ true,  true,  true,  true,  true,  true,  true,  false,
123
/* 0x80 - 0x87 */ false, false, false, false, false, false, false, false,
124
/* 0x88 - 0x8F */ false, false, false, false, false, false, false, false,
125
/* 0x90 - 0x97 */ false, false, false, false, false, false, false, false,
126
/* 0x98 - 0x9F */ false, false, false, false, false, false, false, false,
127
/* 0xA0 - 0xA7 */ true,  true,  true,  true,  true,  true,  true,  true,
128
/* 0xA8 - 0xAF */ true,  true,  true,  true,  true,  true,  true,  true,
129
/* 0xB0 - 0xB7 */ true,  true,  true,  true,  true,  true,  true,  true,
130
/* 0xB8 - 0xBF */ true,  true,  true,  true,  true,  true,  true,  true,
131
/* 0xC0 - 0xC7 */ true,  true,  true,  true,  true,  true,  true,  true,
132
/* 0xC8 - 0xCF */ true,  true,  true,  true,  true,  true,  true,  true,
133
/* 0xD0 - 0xD7 */ true,  true,  true,  true,  true,  true,  true,  true,
134
/* 0xD8 - 0xDF */ true,  true,  true,  true,  true,  true,  true,  true,
135
/* 0xE0 - 0xE7 */ true,  true,  true,  true,  true,  true,  true,  true,
136
/* 0xE8 - 0xEF */ true,  true,  true,  true,  true,  true,  true,  true,
137
/* 0xF0 - 0xF7 */ true,  true,  true,  true,  true,  true,  true,  true,
138
/* 0xF8 - 0xFF */ true,  true,  true,  true,  true,  true,  true,  false,
139
    // clang-format on
140
};
141

142
static bool const is_printable_us_ascii[] = {
143
    // clang-format off
144
/* 0x00 - 0x07 */ false, false, false, false, false, false, false, false,
145
/* 0x08 - 0x0F */ false, false, true,  false, false, false, false, false,
146
/* 0x10 - 0x17 */ false, false, false, false, false, false, false, false,
147
/* 0x18 - 0x1F */ false, false, false, false, false, false, false, false,
148
/* 0x20 - 0x27 */ true,  true,  true,  true,  true,  true,  true,  true,
149
/* 0x28 - 0x2F */ true,  true,  true,  true,  true,  true,  true,  true,
150
/* 0x30 - 0x37 */ true,  true,  true,  true,  true,  true,  true,  true,
151
/* 0x38 - 0x3F */ true,  true,  true,  true,  true,  true,  true,  true,
152
/* 0x40 - 0x47 */ true,  true,  true,  true,  true,  true,  true,  true,
153
/* 0x48 - 0x4F */ true,  true,  true,  true,  true,  true,  true,  true,
154
/* 0x50 - 0x57 */ true,  true,  true,  true,  true,  true,  true,  true,
155
/* 0x58 - 0x5F */ true,  true,  true,  true,  true,  true,  true,  true,
156
/* 0x60 - 0x67 */ true,  true,  true,  true,  true,  true,  true,  true,
157
/* 0x68 - 0x6F */ true,  true,  true,  true,  true,  true,  true,  true,
158
/* 0x70 - 0x77 */ true,  true,  true,  true,  true,  true,  true,  true,
159
/* 0x78 - 0x7F */ true,  true,  true,  true,  true,  true,  true,  false,
160
/* 0x80 - 0x87 */ false, false, false, false, false, false, false, false,
161
/* 0x88 - 0x8F */ false, false, false, false, false, false, false, false,
162
/* 0x90 - 0x97 */ false, false, false, false, false, false, false, false,
163
/* 0x98 - 0x9F */ false, false, false, false, false, false, false, false,
164
/* 0xA0 - 0xA7 */ false, false, false, false, false, false, false, false,
165
/* 0xA8 - 0xAF */ false, false, false, false, false, false, false, false,
166
/* 0xB0 - 0xB7 */ false, false, false, false, false, false, false, false,
167
/* 0xB8 - 0xBF */ false, false, false, false, false, false, false, false,
168
/* 0xC0 - 0xC7 */ false, false, false, false, false, false, false, false,
169
/* 0xC8 - 0xCF */ false, false, false, false, false, false, false, false,
170
/* 0xD0 - 0xD7 */ false, false, false, false, false, false, false, false,
171
/* 0xD8 - 0xDF */ false, false, false, false, false, false, false, false,
172
/* 0xE0 - 0xE7 */ false, false, false, false, false, false, false, false,
173
/* 0xE8 - 0xEF */ false, false, false, false, false, false, false, false,
174
/* 0xF0 - 0xF7 */ false, false, false, false, false, false, false, false,
175
/* 0xF8 - 0xFF */ false, false, false, false, false, false, false, false,
176
    // clang-format on
177
};
178

179
static bool const is_printable_sco[] = {
180
    // clang-format off
181
/* 0x00 - 0x07 */ false, false, false, false, false, false, false, false,
182
/* 0x08 - 0x0F */ false, false, true,  false, false, false, false, false,
183
/* 0x10 - 0x17 */ false, false, false, false, false, false, false, false,
184
/* 0x18 - 0x1F */ false, false, false, false, false, false, false, false,
185
/* 0x20 - 0x27 */ true,  true,  true,  true,  true,  true,  true,  true,
186
/* 0x28 - 0x2F */ true,  true,  true,  true,  true,  true,  true,  true,
187
/* 0x30 - 0x37 */ true,  true,  true,  true,  true,  true,  true,  true,
188
/* 0x38 - 0x3F */ true,  true,  true,  true,  true,  true,  true,  true,
189
/* 0x40 - 0x47 */ true,  true,  true,  true,  true,  true,  true,  true,
190
/* 0x48 - 0x4F */ true,  true,  true,  true,  true,  true,  true,  true,
191
/* 0x50 - 0x57 */ true,  true,  true,  true,  true,  true,  true,  true,
192
/* 0x58 - 0x5F */ true,  true,  true,  true,  true,  true,  true,  true,
193
/* 0x60 - 0x67 */ true,  true,  true,  true,  true,  true,  true,  true,
194
/* 0x68 - 0x6F */ true,  true,  true,  true,  true,  true,  true,  true,
195
/* 0x70 - 0x77 */ true,  true,  true,  true,  true,  true,  true,  true,
196
/* 0x78 - 0x7F */ true,  true,  true,  true,  true,  true,  true,  false,
197
/* 0x80 - 0x87 */ false, false, false, false, false, false, false, false,
198
/* 0x88 - 0x8F */ false, false, false, false, false, false, false, false,
199
/* 0x90 - 0x97 */ false, false, false, false, false, false, false, false,
200
/* 0x98 - 0x9F */ false, false, false, false, false, false, false, false,
201
/* 0xA0 - 0xA7 */ true,  true,  true,  true,  true,  true,  true,  true,
202
/* 0xA8 - 0xAF */ true,  true,  true,  true,  true,  true,  true,  true,
203
/* 0xB0 - 0xB7 */ true,  true,  true,  true,  true,  true,  true,  true,
204
/* 0xB8 - 0xBF */ true,  true,  true,  true,  true,  true,  true,  true,
205
/* 0xC0 - 0xC7 */ true,  true,  true,  true,  true,  true,  true,  true,
206
/* 0xC8 - 0xCF */ true,  true,  true,  true,  true,  true,  true,  true,
207
/* 0xD0 - 0xD7 */ true,  true,  true,  true,  true,  true,  true,  true,
208
/* 0xD8 - 0xDF */ true,  true,  true,  true,  true,  true,  true,  true,
209
/* 0xE0 - 0xE7 */ true,  true,  true,  true,  true,  true,  true,  true,
210
/* 0xE8 - 0xEF */ true,  true,  true,  true,  true,  true,  true,  true,
211
/* 0xF0 - 0xF7 */ true,  true,  true,  true,  true,  true,  true,  true,
212
/* 0xF8 - 0xFF */ true,  true,  true,  true,  true,  true,  true,  false,
213
    // clang-format on
214
};
215

216
// ==========================================================================
217
// IS_PRINTABLE
218
// ==========================================================================
219
bool is_printable(glyph const &gly) noexcept
553✔
220
{
221
    if (gly.charset_ == terminalpp::charset::continuation)
553✔
222
    {
NEW
UNCOV
223
        return false;
×
224
    }
225

226
    bool const *lookup = is_printable_dec;
553✔
227

228
    switch (gly.charset_.value_)
553✔
229
    {
230
        default:
75✔
231
            // Fall-through
232
        case terminalpp::charset::dec:
233
            lookup = is_printable_dec;
75✔
234
            break;
75✔
235

236
        case terminalpp::charset::uk:
7✔
237
            lookup = is_printable_uk;
7✔
238
            break;
7✔
239

240
        case terminalpp::charset::us_ascii:
468✔
241
            lookup = is_printable_us_ascii;
468✔
242
            break;
468✔
243

244
        case terminalpp::charset::sco:
3✔
245
            lookup = is_printable_sco;
3✔
246
            break;
3✔
247
    }
248

249
    return lookup[gly.character_];
553✔
250
}
251

252
// ==========================================================================
253
// DISPLAY_WIDTH
254
// ==========================================================================
255
int display_width(glyph const &gly) noexcept
617✔
256
{
257
    if (gly.charset_ == terminalpp::charset::continuation)
617✔
258
    {
259
        return 0;
4✔
260
    }
261

262
    if (gly.charset_ != terminalpp::charset::utf8)
613✔
263
    {
264
        return 1;
575✔
265
    }
266

267
    auto const codepoint = utf8_decode(utf8_bytes(gly));
38✔
268
    if (codepoint >= 0x4E00 && codepoint <= 0x9FFF)
38✔
269
    {
270
        return 2;
29✔
271
    }
272

273
    if (codepoint >= 0x1F300 && codepoint <= 0x1FAFF)
9✔
274
    {
275
        return 2;
3✔
276
    }
277

278
    return 1;
6✔
279
}
280

281
namespace {
282

283
// ==========================================================================
284
// OUTPUT_CHARSET_AND_CHARACTER
285
// ==========================================================================
286
std::ostream &output_charset_and_character(std::ostream &out, glyph const &gly)
565✔
287
{
288
    if (gly.charset_ != character_set())
565✔
289
    {
290
        out << gly.charset_ << ":";
91✔
291
    }
292

293
    switch (gly.character_)
565✔
294
    {
295
        case '\r':
4✔
296
            return out << "\\r";
4✔
297
        case '\n':
4✔
298
            return out << "\\n";
4✔
299
        case '\t':
4✔
300
            return out << "\\t";
4✔
301
        default:
553✔
302
            if (is_printable(gly))
553✔
303
            {
304
                return out << gly.character_;
535✔
305
            }
306
            else
307
            {
308
                return out << std::format(
18✔
309
                           "0x{:02X}", static_cast<int>(gly.character_));
36✔
310
            }
311
    }
312
}
313

314
// ==========================================================================
315
// UTF8_DECODE
316
// ==========================================================================
317
std::size_t inline_utf8_length(glyph const &gly) noexcept
137✔
318
{
319
    std::size_t length = 1;
137✔
320

321
    for (; length < sizeof(gly.ucharacter_) && gly.ucharacter_[length] != '\0';
300✔
322
         ++length)
323
    {
324
    }
325

326
    return length;
137✔
327
}
328

329
std::uint32_t decode_spilled_utf8_id(glyph const &gly) noexcept
6✔
330
{
331
    return std::uint32_t(gly.ucharacter_[1])
6✔
332
           | (std::uint32_t(gly.ucharacter_[2]) << 8)   // NOLINT
6✔
333
           | (std::uint32_t(gly.ucharacter_[3]) << 16);  // NOLINT
6✔
334
}
335

336
std::uint32_t utf8_decode(bytes data) noexcept
63✔
337
{
338
    if (data.empty())
63✔
339
    {
NEW
UNCOV
340
        return 0;
×
341
    }
342

343
    std::uint32_t value = 0;
63✔
344

345
    if ((data[0] & 0b10000000) == 0)
63✔
346
    {
347
        value = data[0];
2✔
348
    }
349

350
    if (data.size() >= 2 && (data[0] & 0b11100000) == 0b11000000)
63✔
351
    {
352
        value =
17✔
353
            std::uint32_t((data[0] & 0b00011111) << 6);  // NOLINT
17✔
354
        value |=
17✔
355
            std::uint32_t((data[1] & 0b00111111) << 0);  // NOLINT
17✔
356
    }
357

358
    if (data.size() >= 3 && (data[0] & 0b11110000) == 0b11100000)
63✔
359
    {
360
        value =
39✔
361
            std::uint32_t((data[0] & 0b00001111) << 12);  // NOLINT
39✔
362
        value |=
39✔
363
            std::uint32_t((data[1] & 0b00111111) << 6);  // NOLINT
39✔
364
        value |=
39✔
365
            std::uint32_t((data[2] & 0b00111111) << 0);  // NOLINT
39✔
366
    }
367

368
    if (data.size() >= 4 && (data[0] & 0b11111000) == 0b11110000)
63✔
369
    {
370
        value =
5✔
371
            std::uint32_t((data[0] & 0b00000111) << 18);  // NOLINT
5✔
372
        value |=
5✔
373
            std::uint32_t((data[1] & 0b00111111) << 12);  // NOLINT
5✔
374
        value |=
5✔
375
            std::uint32_t((data[2] & 0b00111111) << 6);  // NOLINT
5✔
376
        value |=
5✔
377
            std::uint32_t((data[3] & 0b00111111) << 0);  // NOLINT
5✔
378
    }
379

380
    return value;
63✔
381
}
382

383
}  // namespace
384

385
// ==========================================================================
386
// OPERATOR<<(STREAM, GLYPH)
387
// ==========================================================================
388
std::ostream &operator<<(std::ostream &out, glyph const &gly)
590✔
389
{
390
    if (gly.charset_ == terminalpp::charset::utf8)
590✔
391
    {
392
        auto const data = utf8_bytes(gly);
60✔
393

394
        if (data.size() == 1 && data[0] <= 0x7F)
60✔
395
        {
396
            return output_charset_and_character(out, gly);
35✔
397
        }
398
        else
399
        {
400
            return out << std::format("U+{:04X}", utf8_decode(data));
25✔
401
        }
402
    }
403
    else
404
    {
405
        return output_charset_and_character(out, gly);
530✔
406
    }
407
}
408

409
void glyph::assign_utf8_bytes(byte const *text, std::size_t length) noexcept
6✔
410
{
411
    if (length <= sizeof(ucharacter_))
6✔
412
    {
NEW
UNCOV
413
        for (std::size_t index = 0; index < sizeof(ucharacter_); ++index)
×
414
        {
NEW
UNCOV
415
            ucharacter_[index] = index < length ? text[index] : byte{0};
×
416
        }
417

NEW
UNCOV
418
        return;
×
419
    }
420

421
    auto const id = intern_utf8_bytes(text, length);
6✔
422

423
    ucharacter_[0] = spilled_utf8_marker;
6✔
424
    ucharacter_[1] = static_cast<byte>(id & 0xFF);
6✔
425
    ucharacter_[2] = static_cast<byte>((id >> 8) & 0xFF);   // NOLINT
6✔
426
    ucharacter_[3] = static_cast<byte>((id >> 16) & 0xFF);  // NOLINT
6✔
427
}
428

429
bytes utf8_bytes(glyph const &gly) noexcept
143✔
430
{
431
    if (gly.ucharacter_[0] == spilled_utf8_marker)
143✔
432
    {
433
        auto &utf8_pool = pool();
6✔
434
        std::lock_guard lock{utf8_pool.mutex_};
6✔
435
        auto const &stored =
436
            utf8_pool.storage_[decode_spilled_utf8_id(gly) - std::uint32_t{1U}];
6✔
437
        return {stored.data(), stored.size()};
6✔
438
    }
6✔
439

440
    return {gly.ucharacter_, inline_utf8_length(gly)};
137✔
441
}
442

443
}  // namespace terminalpp
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc