• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ascii-boxes / boxes / 7314850954

24 Dec 2023 01:12PM UTC coverage: 88.826% (+2.5%) from 86.336%
7314850954

push

github

tsjensen
Use -ggdb3 option for more detailed debug info

2869 of 3408 branches covered (0.0%)

Branch coverage included in aggregate %.

4619 of 5022 relevant lines covered (91.98%)

202175.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.21
/src/unicode.c
1
/*
2
 * boxes - Command line filter to draw/remove ASCII boxes around text
3
 * Copyright (c) 1999-2023 Thomas Jensen and the boxes contributors
4
 *
5
 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
6
 * License, version 3, as published by the Free Software Foundation.
7
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
8
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
9
 * details.
10
 * You should have received a copy of the GNU General Public License along with this program.
11
 * If not, see <https://www.gnu.org/licenses/>.
12
 *
13
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
14
 */
15

16
/*
17
 * Functions and constants for handling unicode strings with libunistring.
18
 */
19

20
#include "config.h"
21
#include <errno.h>
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25

26
#include <uniconv.h>
27
#include <unictype.h>
28
#include <unistr.h>
29

30
#include "boxes.h"
31
#include "tools.h"
32
#include "unicode.h"
33

34

35

36
/* effective character encoding of input and output text */
37
const char *encoding;
38

39
/* ucs4_t character '\t' (tab)  */
40
const ucs4_t char_tab = 0x00000009;
41

42
/* ucs4_t character ' '  (space) */
43
const ucs4_t char_space = 0x00000020;
44

45
/* ucs4_t character '\r' (carriage return) */
46
const ucs4_t char_cr = 0x0000000d;
47

48
/* ucs4_t character '\n' (newline) */
49
const ucs4_t char_newline = 0x0000000a;
50

51
/* ucs4_t character 0x1b (escape)  */
52
const ucs4_t char_esc = 0x0000001b;
53

54
/* ucs4_t character '\0' (zero) */
55
const ucs4_t char_nul = 0x00000000;
56

57

58

59
int is_char_at(const uint32_t *text, const size_t idx, const ucs4_t expected_char)
19,328,773✔
60
{
61
    return text != NULL && u32_cmp(text + idx, &expected_char, 1) == 0;
19,328,773!
62
}
63

64

65

66
void set_char_at(uint32_t *text, const size_t idx, const ucs4_t char_to_set)
388,460✔
67
{
68
    u32_set(text + idx, char_to_set, 1);
388,460✔
69
}
388,460✔
70

71

72

73
int is_empty(const uint32_t *text)
19,321,505✔
74
{
75
    return text == NULL || is_char_at(text, 0, char_nul);
19,321,505✔
76
}
77

78

79

80
int u32_is_blank(const uint32_t *text)
112✔
81
{
82
    if (is_empty(text)) {
112✔
83
        return 1;
2✔
84
    }
85

86
    for (const uint32_t *c = text; *c != char_nul; c++) {
594✔
87
        if (!is_blank(*c)) {
484✔
88
            return 0;
×
89
        }
90
    }
242✔
91
    return 1;
110✔
92
}
56✔
93

94

95

96
int is_ascii_printable(const ucs4_t c)
19,151,813✔
97
{
98
    return c >= 0x20 && c < 0x7f;
19,151,813✔
99
}
100

101

102

103
int is_allowed_anywhere(const ucs4_t c)
20,159,629✔
104
{
105
    /* ESC, CR, LF, and TAB are control characters */
106
    return !uc_is_cntrl(c) || c == char_tab || c == char_cr || c == char_newline || c == char_esc;
20,159,629✔
107
}
108

109

110

111
int is_allowed_in_shape(const ucs4_t c)
218,998✔
112
{
113
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline;
218,998✔
114
}
115

116

117

118
int is_allowed_in_sample(const ucs4_t c)
584,882✔
119
{
120
    return is_allowed_anywhere(c);
584,882✔
121
}
122

123

124

125
int is_allowed_in_filename(const ucs4_t c)
492✔
126
{
127
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
492✔
128
}
129

130

131

132
int is_allowed_in_kv_string(const ucs4_t c)
143,942✔
133
{
134
    return is_allowed_anywhere(c) && c != char_cr && c != char_newline && c != char_esc;
143,942✔
135
}
136

137

138

139
int is_blank(const ucs4_t c)
38,362,014✔
140
{
141
    return c == char_tab || uc_is_blank(c);
38,362,014✔
142
}
143

144

145

146
uint32_t *new_empty_string32()
646✔
147
{
148
    return (uint32_t *) calloc(1, sizeof(uint32_t));
646✔
149
}
150

151

152

153
ucs4_t to_utf32(char ascii)
6,070✔
154
{
155
    ucs4_t c = char_nul;
6,070✔
156
    if (ascii >= 0x20 && ascii < 0x7f) {
6,070✔
157
        char *bytes = (char *) (&c);
6,068✔
158
        bytes[0] = ascii;
6,068✔
159
    }
3,034✔
160
    return c;
6,070✔
161
}
162

163

164

165
uint32_t *advance_next32(const uint32_t *s, size_t *invis)
19,282,233✔
166
{
167
    if (is_empty(s)) {
19,282,233!
168
        return (uint32_t *) s;
×
169
    }
170

171
    int ansipos = 0;
19,282,233✔
172
    (*invis) = 0;
19,282,233✔
173
    ucs4_t c;
174
    const uint32_t *rest = s;
19,282,233✔
175
    while ((rest = u32_next(&c, rest))) {
19,621,675✔
176
        if (ansipos == 0 && c == char_esc) {
19,621,673✔
177
            /* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
178
            (*invis)++;
63,404✔
179
            ansipos++;
63,404✔
180
        } else if (ansipos == 1 && (c == '[' || c == '(')) {
19,589,971!
181
            /* Found '[' char after ESC. A CSI sequence has started. */
182
            (*invis)++;
63,404✔
183
            ansipos++;
63,404✔
184
        } else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) { /* between '@' and '_' (mostly uppercase letters) */
19,526,567!
185
            /* Found a byte designating the end of a two-byte escape sequence */
186
            (*invis)++;
×
187
            ansipos = 0;
×
188
            break;
×
189
        } else if (ansipos == 2) {
19,494,865✔
190
            /* Inside CSI sequence - Keep counting chars as invisible */
191
            (*invis)++;
276,036✔
192

193
            /* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
194
            if (c >= 0x40 && c <= 0x7e) {
276,036✔
195
                ansipos = 0;
63,402✔
196
                break;
63,402✔
197
            }
198
        } else {
106,317✔
199
            break;
19,218,829✔
200
        }
201
    }
202
    if (rest == NULL) {
19,282,233✔
203
        rest = s + u32_strlen(s);
2✔
204
    }
1✔
205
    return (uint32_t *) rest;
19,282,233✔
206
}
9,641,118✔
207

208

209

210
uint32_t *u32_strconv_from_input(const char *src)
5,262✔
211
{
212
    return u32_strconv_from_arg(src, encoding);
5,262✔
213
}
214

215

216

217
uint32_t *u32_strconv_from_arg(const char *src, const char *sourceEncoding)
594,580✔
218
{
219
    if (src == NULL) {
594,580✔
220
        return NULL;
4✔
221
    }
222
    if (src[0] == '\0') {
594,576✔
223
        return new_empty_string32();
126✔
224
    }
225

226
    uint32_t *result = u32_strconv_from_encoding(
594,450✔
227
            src,                    /* the source string to convert */
297,225✔
228
            sourceEncoding,         /* the character encoding from which to convert */
297,225✔
229
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
230

231
    if (result == NULL) {
594,450!
232
        fprintf(stderr, "%s: failed to convert from '%s' to UTF-32: %s\n", PROJECT, sourceEncoding, strerror(errno));
×
233
    }
234
    return result;
594,450✔
235
}
297,290✔
236

237

238

239
char *u32_strconv_to_output(const uint32_t *src)
6,366✔
240
{
241
    return u32_strconv_to_arg(src, encoding);
6,366✔
242
}
243

244

245

246
char *u32_strconv_to_arg(const uint32_t *src, const char *targetEncoding)
7,148✔
247
{
248
    if (src == NULL) {
7,148✔
249
        return NULL;
×
250
    }
251
    if (is_empty(src)) {
7,148✔
252
        return strdup("");
18✔
253
    }
254

255
    char *result = u32_strconv_to_encoding(
7,130✔
256
            src,                    /* the source string to convert */
3,565✔
257
            targetEncoding,         /* the character encoding to which to convert */
3,565✔
258
            iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
259

260
    if (result == NULL) {
7,130✔
261
        fprintf(stderr, "%s: failed to convert from UTF-32 to '%s': %s\n", PROJECT, targetEncoding, strerror(errno));
×
262
    }
263
    return result;
7,130✔
264
}
3,574✔
265

266

267

268
const char *check_encoding(const char *manual_encoding, const char *system_encoding)
774✔
269
{
270
    if (manual_encoding != NULL) {
774✔
271
        uint32_t *unicode = u32_strconv_from_encoding(" ", manual_encoding, iconveh_error);
26✔
272
        if (unicode != NULL) {
26✔
273
            BFREE(unicode);
24✔
274
            return manual_encoding;
24✔
275
        }
276
        fprintf(stderr, "%s: Invalid character encoding: %s - falling back to %s\n",
3✔
277
                PROJECT, manual_encoding, system_encoding);
1✔
278
        fflush(stderr);
2✔
279
    }
1✔
280
    return system_encoding;
750✔
281
}
387✔
282

283

284

285
char *to_utf8(uint32_t *src)
808✔
286
{
287
    if (src == NULL) {
808✔
288
        return NULL;
×
289
    }
290
    if (is_empty(src)) {
808!
291
        return (char *) strdup("");
×
292
    }
293
    char *result = u32_strconv_to_encoding(src, "UTF-8", iconveh_error);
808✔
294
    if (result == NULL) {
808!
295
        bx_fprintf(stderr, "%s: failed to convert a string to UTF-8: %s\n", PROJECT, strerror(errno));
×
296
        return NULL;
×
297
    }
298
    return result;
808✔
299
}
404✔
300

301

302

303
uint32_t *u32_nspaces(const size_t n)
3,502✔
304
{
305
    uint32_t *result = (uint32_t *) malloc((n + 1) * sizeof(uint32_t));
3,502✔
306
    if (result == NULL) {
3,502!
307
        perror(PROJECT);
×
308
        return NULL;
×
309
    }
310
    if (n > 0) {
3,502✔
311
        u32_set(result, char_space, n);
2,668✔
312
    }
1,334✔
313
    set_char_at(result, n, char_nul);
3,502✔
314
    return result;
3,502✔
315
}
1,751✔
316

317

318

319
uint32_t *u32_strnrstr(const uint32_t *haystack, const uint32_t *needle, const size_t needle_len)
11,106✔
320
{
321
    if (is_empty(needle)) {
11,106✔
322
        return (uint32_t *) haystack;
98✔
323
    }
324
    if (is_empty(haystack)) {
11,008✔
325
        return NULL;
2✔
326
    }
327

328
    uint32_t *p = u32_strrchr(haystack, needle[0]);
11,006✔
329
    if (!p) {
11,006✔
330
        return NULL;
1,072✔
331
    }
332

333
    while (p >= haystack) {
455,594✔
334
        if (u32_strncmp(p, needle, needle_len) == 0) {
449,746✔
335
            return p;
4,086✔
336
        }
337
        --p;
445,660✔
338
    }
339

340
    return NULL;
5,848✔
341
}
5,553✔
342

343

344

345
void u32_insert_space_at(uint32_t **s, const size_t idx, const size_t n)
8✔
346
{
347
    if (s == NULL || n == 0) {
8✔
348
        return;
4✔
349
    }
350

351
    size_t len = u32_strlen(*s);
4✔
352
    size_t x = idx;
4✔
353
    if (idx > len) {
4✔
354
        x = len;
2✔
355
    }
1✔
356

357
    *s = (uint32_t *) realloc(*s, (len + 1 + n) * sizeof(uint32_t));
4✔
358
    u32_move(*s + x + n, *s + x, len - x);
4✔
359
    u32_set(*s + x, char_space, n);
4✔
360
}
4✔
361

362

363
/* vim: set cindent sw=4: */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc